import java.util.*; import java.io.*; import java.net.*; public class URLWords { /** * The URL as a string. */ protected String urlString; /** * The HashSet containing the words in the web page referred to by the URL. */ protected HashSet words; /** * Creates a URLWords object for the specified URL. * * @param url the URL to download HTML from. */ public URLWords(URL url) { InputStream is; urlString = url.toString(); words = new HashSet(); try { is = url.openStream(); HTMLWordParser parser = new HTMLWordParser(is); try { String word; while ((word = parser.nextWord()) != null) words.add(word.toLowerCase()); } catch (IOException e) { System.out.println("Error while reading from URL: " + urlString); } finally { try { is.close(); } catch (IOException e) { System.out.println("Cannot close stream for URL: " + urlString); } } } catch (IOException e) { System.out.println("Cannot open stream for URL: " + urlString); } } /** * Accessor method for the urlString field. * * @return the urlString field. */ public String getURL() { return urlString; } /** * Determines whether all of the given words are contained in the web page. * * @param searchWords the Vector of words to search for. * @return true if all of the words are contained in the web * page, otherwise false. */ public boolean containsWords(Vector searchWords) { Iterator i = searchWords.iterator(); while (i.hasNext()) if (!words.contains((String) i.next())) return false; return true; } }