// CSE 143, Homework 2: HTML Manager
// Instructor-provided code. You should not modify this file!
// Resource file for HTMLManager. Put this file in the same directory
// as HTMlManager.java, HTMLTag.java, HTMLTagType.java and HTMLMain.java.
import java.net.URL;
import java.net.HttpURLConnection;
import java.io.Reader;
import java.io.File;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.util.*;
/**
* Parses a File, String, or URL into a List
*/
public class HTMLParser {
public String unparsedPage;
/**
* Parses the given input stream from the source with the given name
*/
private void parseStream(String name, InputStream stream) {
try {
/* Read the HTML */
Reader in = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
StringBuilder response = new StringBuilder();
int c = in.read();
while (c >= 0) {
response.append((char)c);
c = in.read();
}
this.unparsedPage = response.toString();
} catch (IOException e) {
System.err.println("The " + name + " is invalid.");
System.exit(1);
}
}
/**
* Creates a parser based off the HTML at the given source URL
*/
public HTMLParser(URL url) {
try {
/* Create the GET request. */
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
conn.setRequestMethod("GET");
conn.setDoOutput(true);
parseStream("URL '" + url.toString() + "'", conn.getInputStream());
} catch (IOException e) {
System.err.println("The URL " + url.toString() + " is invalid.");
System.exit(1);
}
}
/**
* Creates a parser based off the given source File
*/
public HTMLParser(File file) {
String filename = file.toString();
try {
parseStream("file " + filename + "'", new FileInputStream(file));
} catch (FileNotFoundException ee) {
System.err.println("The file '" + filename + "' is invalid.");
System.exit(1);
}
}
/**
* Creates a parser based off the given source String
*/
public HTMLParser(String str) {
this.unparsedPage = str;
}
/**
* HTMLLexer to parse a string of tags into HTMLTags. Iterates
* over the given source String
*/
private class HTMLLexer implements Iterator {
private String page;
private int index;
private boolean inString;
/**
* Creates an HTMLLexer based off the given source String
*/
public HTMLLexer(String page) {
this.page = page;
this.index = 0;
this.inString = false;
}
/**
* Returns the next HTMLTag in the source String
*/
public HTMLTag next() {
int begin = this.index;
/* If we've found an HTML comment... (that isn't a DOCTYPE...) */
if (this.page.substring(begin + 1).startsWith("!--")) {
this.index = this.page.indexOf("-->", this.index);
begin = begin + 2;
int end = this.index - 1;
String element = this.page.substring(begin, end + 1);
return new HTMLTag(element, HTMLTagType.SELF_CLOSING);
}
movePastString('>');
int end = this.index;
String contents = "";
movePastString('<');
if (end + 1 < this.index) {
contents = this.page.substring(end + 1, this.index);
}
if (this.page.charAt(begin + 1) == '/') {
String element = this.page.substring(begin + 2, end);
return new HTMLTag(element, HTMLTagType.CLOSING, contents);
}
else if (this.page.charAt(end - 1) == '/') {
String element = this.page.substring(begin + 1, end - 1);
return new HTMLTag(element, HTMLTagType.SELF_CLOSING, contents);
}
else {
/* If we've found a script element... */
if (this.page.substring(begin + 1, end).startsWith("script")) {
this.index = this.page.indexOf("", begin + 1);
}
this.index = this.page.indexOf("<", this.index);
contents = "";
if (this.index > -1) {
contents = this.page.substring(end + 1, this.index);
}
String elements = this.page.substring(begin + 1, end);
return new HTMLTag(elements, HTMLTagType.OPENING, contents);
}
}
/**
* Returns true if there is another HTMLTag in the source String
* returns false otherwise.
*/
public boolean hasNext() {
int potentialNextIndex = this.page.indexOf("<", this.index);
if (potentialNextIndex != -1) {
this.index = potentialNextIndex;
return true;
}
return false;
}
/**
* Moves the current index in the source String up to the next
* needle not contained in the middle of a String
*/
private boolean movePastString(char needle) {
int potentialNextIndex = this.page.indexOf(needle, this.index);
if (potentialNextIndex == -1) {
return false;
}
int nextSingleQuote = this.page.indexOf("'", this.index);
if (nextSingleQuote != -1 && nextSingleQuote < potentialNextIndex) {
this.inString = !this.inString;
this.index = nextSingleQuote + 1;
return movePastString(needle);
}
int nextDoubleQuote = this.page.indexOf("\"", this.index);
if (nextDoubleQuote != -1 && nextDoubleQuote < potentialNextIndex) {
this.inString = !this.inString;
this.index = nextDoubleQuote + 1;
return movePastString(needle);
}
if (this.inString) {
this.index++;
return movePastString(needle);
}
this.index = potentialNextIndex;
return true;
}
/**
* Throws UnsupportedOperationException
*/
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Parses the source String and returns the List of HTMLTags
*/
public Queue parse() {
Queue parsed = new LinkedList();
HTMLLexer lexer = new HTMLLexer(this.unparsedPage);
while (lexer.hasNext()) {
parsed.add(lexer.next());
}
return parsed;
}
}