import java.io.*;

public class HTMLWordParser {
    /**
     * Start HTML tag character constant.
     */
    protected static final char startHTMLTag = '<';

    /**
     * End HTML tag character constant.
     */
    protected static final char endHTMLTag = '>';

    /**
     * The stream tokenizer constructed from the input stream.
     */
    protected StreamTokenizer st;

    /** 
     * Creates an HTMLWordParser for the specified InputStream.
     *
     * @param is  the input stream to read from.
     */
    public HTMLWordParser(InputStream is) {
	st = new StreamTokenizer(new BufferedReader(new InputStreamReader(is)));

	// Reset the parsing table.
	st.resetSyntax();

	// Set all printing characters to be part of words.
	st.wordChars('\u0020', '\uFFFF');

	// Control characters and the space characters are all white space.
	st.whitespaceChars('\u0000', '\u0020');

	// The HTML delimiters are not parts of words.
	st.ordinaryChar(startHTMLTag);
	st.ordinaryChar(endHTMLTag);
    }

    /**
     * Tests if the character is the start of an HTML tag
     *
     * @param c  the character to test.
     * @return   true if the character is the start of an HTML tag, otherwise
     *           false
     */
    protected boolean isStartHTMLTag(char c) {
	return c == startHTMLTag;
    }

    /**
     * Tests if the character is the end of an HTML tag.
     *
     * @param c  the character to test.
     * @return   true if the character is the end of an HTML tag, otherwise
     *           false
     */
    protected boolean isEndHTMLTag(char c) {
	return c == endHTMLTag;
    }

    /**
     * Reads and discards HTML tags from the input stream.
     *
     * @return                      false if the end of the file is reached,
     *                              otherwise true.
     * @throws java.io.IOException  an I/O error occurred.
     */
    protected boolean skipHTMLTag() throws IOException {
	int ttype;
	while ((ttype = st.nextToken()) != StreamTokenizer.TT_EOF)
	    if (isEndHTMLTag((char) ttype))
		return true; // The end of the HTML tag has been seen.
	return false;
    }

    /**
     * Reads the next word from the input stream, ignoring HTML tags.
     *
     * @return                 the next word from the input stream (excluding
     *                         HTML tags) or null if the end of the stream is
     *                         reached.
     * @exception IOException  if an I/O error occurs.
     */
    public String nextWord() throws IOException {
	int ttype;
	while ((ttype = st.nextToken()) != StreamTokenizer.TT_EOF)
	    if (ttype == StreamTokenizer.TT_WORD)
		// A word has been seen.
		return st.sval;

	    else if (isStartHTMLTag((char) ttype)) {
		// The start of an HTML tag has been seen.
		if (!skipHTMLTag())
		    // The end of the input has been seen without seeing
		    // the end of an HTML tag.
		    return null;

		// Else keep scanning the input.
	    }

	    else if (isEndHTMLTag((char) ttype))
		// A rogue '>' has been seen.
		return ">";

	// The end of the input has been seen.
	return null;
    }
}