001 package hw5;
002 import java.io.BufferedReader;
003 import java.io.File;
004 import java.io.FileReader;
005 import java.io.IOException;
006 import java.util.ArrayList;
007 import java.util.List;
008 import java.util.Map;
009 import java.util.Set;
010
011 /**
012 * Parser utility to load the Marvel Comics dataset.
013 * @author Krysta Yousoufian
014 *
015 */
016 public class MarvelParser {
017
018 /**
019 * Reads the Marvel Universe dataset.
020 * Each line of the input file contains a character name and a comic
021 * book the character appeared in, separated by a tab character
022 *
023 * @requires file is well-formed, with each line containing exactly two
024 * tokens separated by a tab, or else starting with a # symbol
025 * to indicate a comment line.
026 * @param filename the file that will be read
027 * @param characters list in which all character names will be stored;
028 * typically empty when the routine is called
029 * @param books map from titles of comic books to characters that
030 * appear in them; typically empty when the routine is called
031 * @modifies characters, books
032 * @effects fills characters with a list of all unique character names
033 * @effects fills books with a map from each comic book to all characters
034 * appearing in it
035 */
036 public static void parseData(String filename, Set<String> characters,
037 Map<String, List<String>> books) throws Exception {
038 // Why does this method accept the Collections to be filled as
039 // parameters rather than making them a return value? To allows us to
040 // "return" two different Collections. If only one or neither Collection
041 // needs to be returned to the caller, feel free to rewrite this method
042 // without the parameters. Generally this is better style.
043
044 try {
045 BufferedReader reader = new BufferedReader(new FileReader(filename));
046
047 // Construct the collections of characters and books, one
048 // <character, book> pair at a time.
049 String inputLine;
050 while ((inputLine = reader.readLine()) != null) {
051
052 // Ignore comment lines.
053 if (inputLine.startsWith("#"))
054 continue;
055
056 // Parse the data, stripping out quotation marks and throwing
057 // an exception for malformed lines.
058 inputLine = inputLine.replace("\"", "");
059 String[] tokens = inputLine.split("\t");
060 if (tokens.length != 2)
061 throw new Exception("Line should contain exactly one tab: " + inputLine);
062
063 String character = tokens[0];
064 String book = tokens[1];
065
066 // Add the parsed data to the character and book collections.
067 characters.add(character);
068 if (!books.containsKey(book))
069 books.put(book, new ArrayList<String>());
070 books.get(book).add(character);
071 }
072
073 reader.close();
074
075 } catch (IOException e) {
076 System.err.println(e.toString());
077 e.printStackTrace(System.err);
078 }
079 }
080
081 }