|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--websphinx.Region | +--websphinx.Element | +--websphinx.Link
Link to a Web page.
Page
Field Summary | |
static int |
GET
Use the HTTP GET method to download this link. |
static int |
POST
Use the HTTP POST method to access this link. |
protected java.net.URL |
url
|
Fields inherited from class websphinx.Element |
child, endTag, parent, sibling, startTag |
Fields inherited from class websphinx.Region |
end, names, source, start, TRUE |
Constructor Summary | |
Link(java.io.File file)
Make a Link from a File. |
|
Link(java.lang.String href)
Make a Link from a string URL. |
|
Link(websphinx.Tag startTag,
websphinx.Tag endTag,
java.net.URL base)
Make a Link from a start tag and end tag and a base URL (for relative references). |
|
Link(java.net.URL url)
Make a Link from a URL. |
Method Summary | |
void |
discardContent()
Eliminate all references to page content. |
void |
disconnect()
Disconnect this link from its downloaded page (throwing away the page). |
static java.net.URL |
FileToURL(java.io.File file)
Convert a local filename to a URL. |
int |
getDepth()
Get depth of link in crawl. |
java.lang.String |
getDirectory()
Get the directory part of the link, like "/home/dir/". |
java.net.URL |
getDirectoryURL()
Get the URL of a page's directory. |
static java.net.URL |
getDirectoryURL(java.net.URL url)
Get the URL of a page's directory. |
websphinx.DownloadParameters |
getDownloadParameters()
Get the download parameters used for this link. |
java.lang.String |
getFile()
Get the information part of the link, like "/home/dir/index.html?query". |
java.lang.String |
getFilename()
Get the filename part of the link, like "index.html". |
java.lang.String |
getHost()
Get the hostname of the link, like "www.cs.cmu.edu". |
int |
getMethod()
Get the method used to access this link. |
websphinx.Page |
getPage()
Get the downloaded page to which the link points. |
java.net.URL |
getPageURL()
Get the URL of a page, omitting any anchor reference (like #ref). |
static java.net.URL |
getPageURL(java.net.URL url)
Get the URL of a page, omitting any anchor reference (like #ref). |
java.net.URL |
getParentURL()
Get the URL of a page's parent directory. |
static java.net.URL |
getParentURL(java.net.URL url)
Get the URL of a page's parent directory. |
int |
getPort()
Get the port number of the link. |
float |
getPriority()
Get the priority of the link in the crawl. |
java.lang.String |
getProtocol()
Get the network protocol of the link, like "ftp" or "http". |
java.lang.String |
getQuery()
Get the query part of the link, like "?query". |
java.lang.String |
getRef()
Get the anchor reference of the link, like "#ref". |
java.net.URL |
getServiceURL()
Get the URL of a Web service, omitting any query or anchor reference. |
static java.net.URL |
getServiceURL(java.net.URL url)
Get the URL of a Web service, omitting any query or anchor reference. |
int |
getStatus()
Get the status of the link. |
java.net.URL |
getURL()
Get the URL. |
static java.lang.String |
relativeTo(java.net.URL here,
java.lang.String there)
|
static java.lang.String |
relativeTo(java.net.URL here,
java.net.URL there)
|
websphinx.Tag |
replaceHref(java.lang.String newHref)
Copy the link's start tag, replacing the URL. |
void |
setDepth(int depth)
|
void |
setDownloadParameters(websphinx.DownloadParameters dp)
Set the download parameters used for this link. |
void |
setPage(websphinx.Page page)
Set the page corresponding to this link. |
void |
setPriority(float priority)
Set the priority of the link in the crawl. |
void |
setStatus(int event)
Set the status of the link. |
void |
setText(java.lang.String text)
Set the tagless-text representation of this region. |
java.lang.String |
toDescription()
Generate a human-readable description of the link. |
java.lang.String |
toText()
Convert the region to tagless text. |
java.lang.String |
toURL()
Convert the link's URL to a String |
static java.lang.String |
toURLDelimiters(java.lang.String path)
|
protected java.net.URL |
urlFromHref(websphinx.Tag tag,
java.net.URL base)
Construct the URL for a link element, from its start tag and a base URL (for relative references). |
static java.io.File |
URLToFile(java.net.URL url)
Convert a file: URL to a filename appropriate to the current system platform. |
Methods inherited from class websphinx.Element |
enumerateHTMLAttributes, getChild, getEndTag, getHTMLAttribute, getHTMLAttribute, getNext, getParent, getSibling, getStartTag, getTagName, hasHTMLAttribute |
Methods inherited from class websphinx.Region |
enumerateObjectLabels, findEnd, findStart, getEnd, getField, getFields, getLabel, getLabel, getLength, getNumericLabel, getObjectLabel, getObjectLabels, getRootElement, getSource, getStart, hasAllLabels, hasAllLabels, hasAnyLabels, hasAnyLabels, hasLabel, removeLabel, setField, setFields, setLabel, setLabel, setObjectLabel, span, toHTML, toString, toTags |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
protected java.net.URL url
public static final int GET
public static final int POST
Constructor Detail |
public Link(websphinx.Tag startTag, websphinx.Tag endTag, java.net.URL base) throws java.net.MalformedURLException
startTag
- Start tag of elementendTag
- End tag of elementbase
- Base URL used for relative referencespublic Link(java.net.URL url)
public Link(java.io.File file) throws java.net.MalformedURLException
public Link(java.lang.String href) throws java.net.MalformedURLException
java.net.MalformedURLException
- if the URL is invalidMethod Detail |
public void setDepth(int depth)
public void discardContent()
public void disconnect()
public int getDepth()
public java.net.URL getURL()
public java.lang.String getProtocol()
public java.lang.String getHost()
public int getPort()
public java.lang.String getFile()
public java.lang.String getDirectory()
public java.lang.String getFilename()
public java.lang.String getQuery()
public java.lang.String getRef()
public java.net.URL getPageURL()
public static java.net.URL getPageURL(java.net.URL url)
public java.net.URL getServiceURL()
public static java.net.URL getServiceURL(java.net.URL url)
public java.net.URL getDirectoryURL()
public static java.net.URL getDirectoryURL(java.net.URL url)
public java.net.URL getParentURL()
public static java.net.URL getParentURL(java.net.URL url)
public static java.lang.String relativeTo(java.net.URL here, java.net.URL there)
public static java.lang.String relativeTo(java.net.URL here, java.lang.String there)
public static java.net.URL FileToURL(java.io.File file) throws java.net.MalformedURLException
file
- File to convert
java.net.MalformedURLException
public static java.io.File URLToFile(java.net.URL url) throws java.net.MalformedURLException
url
- URL to convert
java.net.MalformedURLException
- if url is not a
file: URL.public static java.lang.String toURLDelimiters(java.lang.String path)
public websphinx.Page getPage()
public void setPage(websphinx.Page page)
page
- Page to which this link pointspublic int getMethod()
public java.lang.String toURL()
public java.lang.String toDescription()
public java.lang.String toText()
toText
in class Region
public void setText(java.lang.String text)
text
- a string consisting of the text in the page contained by this regionprotected java.net.URL urlFromHref(websphinx.Tag tag, java.net.URL base) throws java.net.MalformedURLException
tag
- Start tag of link, such as <A HREF="/foo/index.html">.base
- Base URL used for relative references
java.net.MalformedURLException
public websphinx.Tag replaceHref(java.lang.String newHref)
newHref
- New URL or relative reference; e.g. "http://www.cs.cmu.edu/" or "/foo/index.html".
public int getStatus()
public void setStatus(int event)
event
- the event that just happened to this linkpublic float getPriority()
getPriority
in interface websphinx.util.Prioritized
public void setPriority(float priority)
public websphinx.DownloadParameters getDownloadParameters()
public void setDownloadParameters(websphinx.DownloadParameters dp)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |