001package hw6;
002import java.io.*;
003import java.util.*;
004
005/**
006 * Parser utility to load the Marvel Comics dataset.
007 * @author Krysta Yousoufian
008 *
009 */
010public class MarvelParser {
011
012  /**
013   * Reads the Marvel Universe dataset.
014   * Each line of the input file contains a character name and a comic
015   * book the character appeared in, separated by a tab character
016   * 
017   * @requires file is well-formed, with each line containing exactly two
018   *          tokens separated by a tab, or else starting with a # symbol
019   *          to indicate a comment line.
020   * @param filename the file that will be read
021   * @param characters list in which all character names will be stored;
022   *          typically empty when the routine is called
023   * @param books map from titles of comic books to characters that
024   *          appear in them; typically empty when the routine is called
025   * @modifies characters, books
026   * @effects fills characters with a list of all unique character names
027   * @effects fills books with a map from each comic book to all characters
028   *          appearing in it
029   */
030  public static void parseData(String filename, Set<String> characters,
031      Map<String, List<String>> books) throws Exception {
032    // Why does this method accept the Collections to be filled as
033    // parameters rather than making them a return value? To allows us to
034    // "return" two different Collections. If only one or neither Collection
035    // needs to be returned to the caller, feel free to rewrite this method
036    // without the parameters. Generally this is better style.
037    BufferedReader reader = null;
038    try {
039      reader = new BufferedReader(new FileReader(filename));
040
041      // Construct the collections of characters and books, one
042      // <character, book> pair at a time.
043      String inputLine;
044      while ((inputLine = reader.readLine()) != null) {
045
046        // Ignore comment lines.
047        if (inputLine.startsWith("#")) {
048          continue;
049        }
050
051        // Parse the data, stripping out quotation marks and throwing
052        // an exception for malformed lines.
053        inputLine = inputLine.replace("\"", "");
054        String[] tokens = inputLine.split("\t");
055        if (tokens.length != 2) {
056          throw new Exception("Line should contain exactly one tab: " + inputLine);
057        }
058
059        String character = tokens[0];
060        String book = tokens[1];
061
062        // Add the parsed data to the character and book collections.
063        characters.add(character);
064        if (!books.containsKey(book)) {
065          books.put(book, new ArrayList<String>());
066        }
067        books.get(book).add(character);
068      }
069    } catch (IOException e) {
070      System.err.println(e.toString());
071      e.printStackTrace(System.err);
072    } finally {
073      if (reader != null) {
074        reader.close();
075      }
076    }
077  }
078
079}