# CSE 143, Winter 2009, Marty Stepp
# Homework 2: HTML Validator (Python)
#
# Instructor-provided code.
# This program tests your HTML validator object on any file or URL you want.
#
# When it prompts you for a file name, if you type a simple string such
# as "test1.html" (without the quotes) it will just look on your hard disk
# in the same directory as your code or Eclipse project.
#
# If you type a string such as "http://www.google.com/index.html", it will
# connect to that URL and download the HTML content from it.

from htmltag import *
from htmlvalidator import *
import re
import urllib


# Reads the file or URL given, and tokenizes the text in that file,
# placing the tokens into the given list.
# You don't need to call this method in your homework code.
# Precondition: address represents a valid file/URL
def tokenize(text):
    queue = re.findall("<[^>]+>", text)
    for i in range(len(queue)):
        # '<foo bar="baz" />'  -->  'foo'
        tag = re.sub("[<>]|([ \t\n\f].*)", "", queue[i])
        if "/" in tag:
            tag = re.sub("/", "", tag)
            queue[i] = HtmlTag(tag, False)
        else:
            queue[i] = HtmlTag(tag, True)

    return queue


# main
validator = None
page_text = ""
choice = "s"
while True:
    if choice.startswith("s"):
        # prompt for page, then download it if it's a URL
        url = raw_input("Page URL or file name: ")
        if (url.startswith("http:")):
            print("Downloading from " + url + "...")
            page_text = urllib.urlopen(url).read()
        else:
            page_text = file(url).read()
        
        tags = tokenize(page_text)

        # create/update the HTML validator
        if validator == None:
            validator = HtmlValidator(tags)
        else:
            validator.tags = tags
     
    elif choice.startswith("p"):
        print(page_text)
    elif choice.startswith("g"):
        print("tags: " + re.sub("'", "", str(map(str, validator.tags))))
    elif (choice.startswith("v")):
        result = validator.validate()
        print("")
        print("validate() returned " + str(result))
    else:
        break
    
    print("")
    choice = raw_input("(g)etTags, (v)alidate, (s)et URL, (p)rint HTML, or (q)uit? ").lower()