001 package hw5; 002 import java.io.BufferedReader; 003 import java.io.File; 004 import java.io.FileReader; 005 import java.io.IOException; 006 import java.util.ArrayList; 007 import java.util.List; 008 import java.util.Map; 009 import java.util.Set; 010 011 /** 012 * Parser utility to load the Marvel Comics dataset. 013 * @author Krysta Yousoufian 014 * 015 */ 016 public class MarvelParser { 017 018 /** 019 * Reads the Marvel Universe dataset. 020 * Each line of the input file contains a character name and a comic 021 * book the character appeared in, separated by a tab character 022 * 023 * @requires file is well-formed, with each line containing exactly two 024 * tokens separated by a tab, or else starting with a # symbol 025 * to indicate a comment line. 026 * @param filename the file that will be read 027 * @param characters list in which all character names will be stored; 028 * typically empty when the routine is called 029 * @param books map from titles of comic books to characters that 030 * appear in them; typically empty when the routine is called 031 * @modifies characters, books 032 * @effects fills characters with a list of all unique character names 033 * @effects fills books with a map from each comic book to all characters 034 * appearing in it 035 */ 036 public static void parseData(String filename, Set<String> characters, 037 Map<String, List<String>> books) throws Exception { 038 // Why does this method accept the Collections to be filled as 039 // parameters rather than making them a return value? To allows us to 040 // "return" two different Collections. If only one or neither Collection 041 // needs to be returned to the caller, feel free to rewrite this method 042 // without the parameters. Generally this is better style. 043 044 try { 045 BufferedReader reader = new BufferedReader(new FileReader(filename)); 046 047 // Construct the collections of characters and books, one 048 // <character, book> pair at a time. 049 String inputLine; 050 while ((inputLine = reader.readLine()) != null) { 051 052 // Ignore comment lines. 053 if (inputLine.startsWith("#")) 054 continue; 055 056 // Parse the data, stripping out quotation marks and throwing 057 // an exception for malformed lines. 058 inputLine = inputLine.replace("\"", ""); 059 String[] tokens = inputLine.split("\t"); 060 if (tokens.length != 2) 061 throw new Exception("Line should contain exactly one tab: " + inputLine); 062 063 String character = tokens[0]; 064 String book = tokens[1]; 065 066 // Add the parsed data to the character and book collections. 067 characters.add(character); 068 if (!books.containsKey(book)) 069 books.put(book, new ArrayList<String>()); 070 books.get(book).add(character); 071 } 072 073 reader.close(); 074 075 } catch (IOException e) { 076 System.err.println(e.toString()); 077 e.printStackTrace(System.err); 078 } 079 } 080 081 }