// CSE 143, Winter 2009, Marty Stepp
// An HtmlTag object represents an HTML tag, such as or .
// In this version of HtmlTag we added a compareTo method and made the
// class implement Comparable.
import java.util.*;
public class HtmlTag implements Comparable {
// fields
private final String element;
private final boolean isOpenTag;
/**
* Constructs an HTML "opening" tag with the given element (e.g. "table").
* Throws a NullPointerException if element is null.
*/
public HtmlTag(String element) {
this(element, true);
}
/**
* Constructs an HTML tag with the given element (e.g. "table") and type.
* Self-closing tags like
are considered to be "opening" tags,
* but return false from the requiresClosingTag method.
* Throws a NullPointerException if element is null.
*/
public HtmlTag(String element, boolean isOpenTag) {
this.element = element.toLowerCase();
this.isOpenTag = isOpenTag;
}
// Compares tags by their element first ("body" comes before "head"),
// breaking ties by putting opening tags before closing tags.
// Returns < 0 for "less", 0 for "equal", and > 0 for "greater than".
public int compareTo(HtmlTag other) {
int compare = element.compareTo(other.getElement());
if (compare != 0) {
// different tags; just use String's compareTo result
return compare;
} else {
// same tag
if ((isOpenTag && other.isOpenTag()) || (!isOpenTag && !other.isOpenTag())) {
return 0; // exactly the same kind of tag
} else if (!isOpenTag && other.isOpenTag()) {
return 1; // he is an opening tag and I am a closing; I come after
} else {
return -1; // I'm an opening tag and he is a closing; I come before
}
}
}
/**
* Returns true if this tag has the same element and type as the
* given other tag.
*/
public boolean equals(Object o) {
if (o instanceof HtmlTag) {
HtmlTag other = (HtmlTag) o;
return element.equals(other.element) && isOpenTag == other.isOpenTag;
} else {
return false;
}
}
/** Returns this HTML tag's element. */
public String getElement() {
return element;
}
/**
* Returns true if this HTML tag is an "opening" (starting) tag and false
* if it is a closing tag.
* Self-closing tags like
are considered to be "opening" tags,
* but they return false from the requiresClosingTag method.
*/
public boolean isOpenTag() {
return isOpenTag;
}
/**
* Returns true if the given other tag is non-null and matches this tag;
* that is, if they have the same element but opposite types,
* such as and .
*/
public boolean matches(HtmlTag other) {
return other != null && element.equals(other.element) && isOpenTag != other.isOpenTag;
}
/**
* Returns true if this tag requires a matching closing tag; usually this
* is true, except for certain elements such as br and img.
*/
public boolean requiresClosingTag() {
return !NON_MATCHING_TAGS.contains(element);
}
/** Returns a string representation of this HTML tag, such as "". */
public String toString() {
return "<" + (isOpenTag ? "" : "/") + (element.equals("!--") ? "!-- --" : element) + ">";
}
// a set of tags that don't need to be matched (self-closing)
private static final Set NON_MATCHING_TAGS = new HashSet(
Arrays.asList("!doctype", "!--", "area", "base", "basefont",
"br", "col", "frame", "hr", "img", "input",
"link", "meta", "param"));
// all whitespace characters; used in text parsing
private static final String WHITESPACE = " \f\n\r\t";
/**
* Reads the file or URL given, and tokenizes the text in that file,
* placing the tokens into the given Queue.
* You don't need to call this method in your homework code.
* Precondition: address represents a valid file/URL
*/
public static Queue tokenize(String text) {
StringBuffer buf = new StringBuffer(text);
Queue queue = new LinkedList();
while (true) {
HtmlTag nextTag = nextTag(buf);
if (nextTag == null) {
break;
} else {
queue.add(nextTag);
}
}
return queue;
}
// advances to next tag in input;
// probably not a perfect HTML tag tokenizer, but it will do for this HW
private static HtmlTag nextTag(StringBuffer buf) {
int index1 = buf.indexOf("<");
int index2 = buf.indexOf(">");
if (index1 >= 0 && index2 > index1) {
// check for HTML comments:
if (index1 + 4 <= buf.length() && buf.substring(index1 + 1, index1 + 4).equals("!--")) {
// a comment; look for closing comment tag -->
index2 = buf.indexOf("-->", index1 + 4);
if (index2 < 0) {
return null;
} else {
buf.insert(index1 + 4, " "); // fixes things like
index2 += 3; // advance to the closing >
}
}
String element = buf.substring(index1 + 1, index2).trim();
// remove attributes
for (int i = 0; i < WHITESPACE.length(); i++) {
int index3 = element.indexOf(WHITESPACE.charAt(i));
if (index3 >= 0) {
element = element.substring(0, index3);
}
}
// determine whether opening or closing tag
boolean isOpenTag = true;
if (element.indexOf("/") == 0) {
isOpenTag = false;
element = element.substring(1);
}
buf.delete(0, index2 + 1);
return new HtmlTag(element, isOpenTag);
} else {
return null;
}
}
}