001package hw6;
002import java.io.*;
003import java.util.*;
004
005/**
006 * Parser utility to load the Marvel Comics dataset.
007 */
008public class MarvelParser {
009    /**
010     * A checked exception class for bad data files
011     */
012    @SuppressWarnings("serial")
013    public static class MalformedDataException extends Exception {
014        public MalformedDataException() { }
015
016        public MalformedDataException(String message) {
017            super(message);
018        }
019
020        public MalformedDataException(Throwable cause) {
021            super(cause);
022        }
023
024        public MalformedDataException(String message, Throwable cause) {
025            super(message, cause);
026        }
027    }
028
029  /**
030   * Reads the Marvel Universe dataset.
031   * Each line of the input file contains a character name and a comic
032   * book the character appeared in, separated by a tab character
033   * 
034   * @requires filename is a valid file path
035   * @param filename the file that will be read
036   * @param characters list in which all character names will be stored;
037   *          typically empty when the routine is called
038   * @param books map from titles of comic books to characters that
039   *          appear in them; typically empty when the routine is called
040   * @modifies characters, books
041   * @effects fills characters with a list of all unique character names
042   * @effects fills books with a map from each comic book to all characters
043   *          appearing in it
044   * @throws MalformedDataException if the file is not well-formed:
045   *          each line contains exactly two tokens separated by a tab,
046   *          or else starting with a # symbol to indicate a comment line.
047   */
048  public static void parseData(String filename, Set<String> characters,
049      Map<String, List<String>> books) throws MalformedDataException {
050    // Why does this method accept the Collections to be filled as
051    // parameters rather than making them a return value? To allows us to
052    // "return" two different Collections. If only one or neither Collection
053    // needs to be returned to the caller, feel free to rewrite this method
054    // without the parameters. Generally this is better style.
055    BufferedReader reader = null;
056    try {
057        reader = new BufferedReader(new FileReader(filename));
058
059        // Construct the collections of characters and books, one
060        // <character, book> pair at a time.
061        String inputLine;
062        while ((inputLine = reader.readLine()) != null) {
063
064            // Ignore comment lines.
065            if (inputLine.startsWith("#")) {
066                continue;
067            }
068
069            // Parse the data, stripping out quotation marks and throwing
070            // an exception for malformed lines.
071            inputLine = inputLine.replace("\"", "");
072            String[] tokens = inputLine.split("\t");
073            if (tokens.length != 2) {
074                throw new MalformedDataException("Line should contain exactly one tab: "
075                                                 + inputLine);
076            }
077
078            String character = tokens[0];
079            String book = tokens[1];
080
081            // Add the parsed data to the character and book collections.
082            characters.add(character);
083            if (!books.containsKey(book)) {
084                books.put(book, new ArrayList<String>());
085            }
086            books.get(book).add(character);
087        }
088    } catch (IOException e) {
089        System.err.println(e.toString());
090        e.printStackTrace(System.err);
091    } finally {
092        if (reader != null) {
093            try {
094                reader.close();
095            } catch (IOException e) {
096                System.err.println(e.toString());
097                e.printStackTrace(System.err);
098            }
099        }
100    }
101  }
102
103}