|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--websphinx.Region | +--websphinx.Element | +--websphinx.Link
Link to a Web page.
Page
Field Summary | |
private int |
depth
|
private java.lang.String |
directory
|
private DownloadParameters |
dp
|
private java.lang.String |
filename
|
static int |
GET
Use the HTTP GET method to download this link. |
private Page |
page
|
static int |
POST
Use the HTTP POST method to access this link. |
private float |
priority
|
private java.lang.String |
query
|
private java.lang.String |
ref
|
private int |
status
|
private java.lang.String |
text
|
protected java.net.URL |
url
|
Fields inherited from class websphinx.Element |
child, endTag, parent, sibling, startTag |
Fields inherited from class websphinx.Region |
end, INITIAL_SIZE, names, source, start, TRUE |
Constructor Summary | |
Link(java.io.File file)
Make a Link from a File. |
|
Link(java.lang.String href)
Make a Link from a string URL. |
|
Link(Tag startTag,
Tag endTag,
java.net.URL base)
Make a Link from a start tag and end tag and a base URL (for relative references). |
|
Link(java.net.URL url)
Make a Link from a URL. |
Method Summary | |
void |
discardContent()
Eliminate all references to page content. |
void |
disconnect()
Disconnect this link from its downloaded page (throwing away the page). |
static java.net.URL |
FileToURL(java.io.File file)
Convert a local filename to a URL. |
int |
getDepth()
Get depth of link in crawl. |
java.lang.String |
getDirectory()
Get the directory part of the link, like "/home/dir/". |
java.net.URL |
getDirectoryURL()
Get the URL of a page's directory. |
static java.net.URL |
getDirectoryURL(java.net.URL url)
Get the URL of a page's directory. |
DownloadParameters |
getDownloadParameters()
Get the download parameters used for this link. |
java.lang.String |
getFile()
Get the information part of the link, like "/home/dir/index.html?query". |
java.lang.String |
getFilename()
Get the filename part of the link, like "index.html". |
java.lang.String |
getHost()
Get the hostname of the link, like "www.cs.cmu.edu". |
private static java.lang.String |
getHrefAttributeName(Tag tag)
|
int |
getMethod()
Get the method used to access this link. |
Page |
getPage()
Get the downloaded page to which the link points. |
java.net.URL |
getPageURL()
Get the URL of a page, omitting any anchor reference (like #ref). |
static java.net.URL |
getPageURL(java.net.URL url)
Get the URL of a page, omitting any anchor reference (like #ref). |
java.net.URL |
getParentURL()
Get the URL of a page's parent directory. |
static java.net.URL |
getParentURL(java.net.URL url)
Get the URL of a page's parent directory. |
int |
getPort()
Get the port number of the link. |
float |
getPriority()
Get the priority of the link in the crawl. |
java.lang.String |
getProtocol()
Get the network protocol of the link, like "ftp" or "http". |
java.lang.String |
getQuery()
Get the query part of the link, like "?query". |
java.lang.String |
getRef()
Get the anchor reference of the link, like "#ref". |
java.net.URL |
getServiceURL()
Get the URL of a Web service, omitting any query or anchor reference. |
static java.net.URL |
getServiceURL(java.net.URL url)
Get the URL of a Web service, omitting any query or anchor reference. |
int |
getStatus()
Get the status of the link. |
java.net.URL |
getURL()
Get the URL. |
private void |
parseURL()
|
private static java.lang.String |
relativeTo(java.lang.String here,
java.lang.String there)
|
static java.lang.String |
relativeTo(java.net.URL here,
java.lang.String there)
|
static java.lang.String |
relativeTo(java.net.URL here,
java.net.URL there)
|
Tag |
replaceHref(java.lang.String newHref)
Copy the link's start tag, replacing the URL. |
void |
setDownloadParameters(DownloadParameters dp)
Set the download parameters used for this link. |
void |
setPage(Page page)
Set the page corresponding to this link. |
void |
setPriority(float priority)
Set the priority of the link in the crawl. |
void |
setStatus(int event)
Set the status of the link. |
void |
setText(java.lang.String text)
Set the tagless-text representation of this region. |
java.lang.String |
toDescription()
Generate a human-readable description of the link. |
java.lang.String |
toText()
Convert the region to tagless text. |
java.lang.String |
toURL()
Convert the link's URL to a String |
static java.lang.String |
toURLDelimiters(java.lang.String path)
|
protected java.net.URL |
urlFromHref(Tag tag,
java.net.URL base)
Construct the URL for a link element, from its start tag and a base URL (for relative references). |
static java.io.File |
URLToFile(java.net.URL url)
Convert a file: URL to a filename appropriate to the current system platform. |
Methods inherited from class websphinx.Element |
enumerateHTMLAttributes, getChild, getEndTag, getHTMLAttribute, getHTMLAttribute, getNext, getParent, getSibling, getStartTag, getTagName, hasHTMLAttribute |
Methods inherited from class websphinx.Region |
enumerateObjectLabels, findEnd, findStart, getEnd, getField, getFields, getLabel, getLabel, getLength, getNumericLabel, getObjectLabel, getObjectLabels, getRootElement, getSource, getStart, hasAllLabels, hasAllLabels, hasAnyLabels, hasAnyLabels, hasLabel, removeLabel, setField, setFields, setLabel, setLabel, setObjectLabel, span, toHTML, toString, toTags |
Methods inherited from class java.lang.Object |
|
Field Detail |
protected java.net.URL url
private java.lang.String directory
private java.lang.String filename
private java.lang.String query
private java.lang.String ref
private Page page
private int depth
private java.lang.String text
private int status
private float priority
private DownloadParameters dp
public static final int GET
public static final int POST
Constructor Detail |
public Link(Tag startTag, Tag endTag, java.net.URL base) throws java.net.MalformedURLException
startTag
- Start tag of elementendTag
- End tag of elementbase
- Base URL used for relative referencespublic Link(java.net.URL url)
public Link(java.io.File file) throws java.net.MalformedURLException
public Link(java.lang.String href) throws java.net.MalformedURLException
java.net.MalformedURLException
- if the URL is invalidMethod Detail |
public void discardContent()
public void disconnect()
public int getDepth()
public java.net.URL getURL()
public java.lang.String getProtocol()
public java.lang.String getHost()
public int getPort()
public java.lang.String getFile()
public java.lang.String getDirectory()
public java.lang.String getFilename()
public java.lang.String getQuery()
public java.lang.String getRef()
public java.net.URL getPageURL()
public static java.net.URL getPageURL(java.net.URL url)
public java.net.URL getServiceURL()
public static java.net.URL getServiceURL(java.net.URL url)
public java.net.URL getDirectoryURL()
public static java.net.URL getDirectoryURL(java.net.URL url)
public java.net.URL getParentURL()
public static java.net.URL getParentURL(java.net.URL url)
public static java.lang.String relativeTo(java.net.URL here, java.net.URL there)
public static java.lang.String relativeTo(java.net.URL here, java.lang.String there)
private static java.lang.String relativeTo(java.lang.String here, java.lang.String there)
public static java.net.URL FileToURL(java.io.File file) throws java.net.MalformedURLException
file
- File to convertpublic static java.io.File URLToFile(java.net.URL url) throws java.net.MalformedURLException
url
- URL to convertjava.net.MalformedURLException
- if url is not a
file: URL.public static java.lang.String toURLDelimiters(java.lang.String path)
public Page getPage()
public void setPage(Page page)
page
- Page to which this link pointspublic int getMethod()
public java.lang.String toURL()
public java.lang.String toDescription()
public java.lang.String toText()
toText
in class Region
public void setText(java.lang.String text)
text
- a string consisting of the text in the page contained by this regionprivate void parseURL()
protected java.net.URL urlFromHref(Tag tag, java.net.URL base) throws java.net.MalformedURLException
tag
- Start tag of link, such as <A HREF="/foo/index.html">.base
- Base URL used for relative referencespublic Tag replaceHref(java.lang.String newHref)
newHref
- New URL or relative reference; e.g. "http://www.cs.cmu.edu/" or "/foo/index.html".private static java.lang.String getHrefAttributeName(Tag tag)
public int getStatus()
public void setStatus(int event)
event
- the event that just happened to this linkpublic float getPriority()
getPriority
in interface Prioritized
public void setPriority(float priority)
public DownloadParameters getDownloadParameters()
public void setDownloadParameters(DownloadParameters dp)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: INNER | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |