websphinx
Class RobotExclusion

java.lang.Object
  |
  +--websphinx.RobotExclusion

public class RobotExclusion
extends java.lang.Object


Field Summary
(package private)  java.util.Hashtable entries
           
(package private)  java.lang.String fieldName
           
(package private)  java.lang.String fieldValue
           
(package private)  java.lang.StringBuffer linebuf
           
(package private) static int MAX_LINE_LENGTH
           
(package private)  java.lang.String myUserAgent
           
(package private)  java.util.Vector rulebuf
           
 
Constructor Summary
RobotExclusion(java.lang.String userAgent)
          Make a RobotExclusion object.
 
Method Summary
 void clear()
          Clear the cache of robots.txt entries.
 boolean disallowed(java.net.URL url)
          Check whether a URL is disallowed by robots.txt.
(package private)  java.lang.String[] getRobotsTxt(java.lang.String website, java.lang.String userAgent)
           
(package private)  java.lang.String getWebSite(java.net.URL url)
           
static void main(java.lang.String[] argv)
           
(package private)  boolean readField(java.io.PushbackInputStream in)
           
 
Methods inherited from class java.lang.Object
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait
 

Field Detail

myUserAgent

java.lang.String myUserAgent

entries

java.util.Hashtable entries

rulebuf

java.util.Vector rulebuf

fieldName

java.lang.String fieldName

fieldValue

java.lang.String fieldValue

MAX_LINE_LENGTH

static final int MAX_LINE_LENGTH

linebuf

java.lang.StringBuffer linebuf
Constructor Detail

RobotExclusion

public RobotExclusion(java.lang.String userAgent)
Make a RobotExclusion object.
Parameters:
userAgent - name of the robot using this object, as shown in the User-Agent header fields of its HTTP requests. Use null for anonymous robots.
Method Detail

disallowed

public boolean disallowed(java.net.URL url)
Check whether a URL is disallowed by robots.txt.
Parameters:
url - URL to test
Returns:
true if url's Web site denies robot access to the url

clear

public void clear()
Clear the cache of robots.txt entries.

getWebSite

java.lang.String getWebSite(java.net.URL url)

getRobotsTxt

java.lang.String[] getRobotsTxt(java.lang.String website,
                                java.lang.String userAgent)

readField

boolean readField(java.io.PushbackInputStream in)
            throws java.lang.Exception

main

public static void main(java.lang.String[] argv)
                 throws java.lang.Exception