001package hw6; 002import java.io.*; 003import java.util.*; 004 005/** 006 * Parser utility to load the Marvel Comics dataset. 007 */ 008public class MarvelParser { 009 /** 010 * A checked exception class for bad data files 011 */ 012 @SuppressWarnings("serial") 013 public static class MalformedDataException extends Exception { 014 public MalformedDataException() { } 015 016 public MalformedDataException(String message) { 017 super(message); 018 } 019 020 public MalformedDataException(Throwable cause) { 021 super(cause); 022 } 023 024 public MalformedDataException(String message, Throwable cause) { 025 super(message, cause); 026 } 027 } 028 029 /** 030 * Reads the Marvel Universe dataset. 031 * Each line of the input file contains a character name and a comic 032 * book the character appeared in, separated by a tab character 033 * 034 * @requires filename is a valid file path 035 * @param filename the file that will be read 036 * @param characters list in which all character names will be stored; 037 * typically empty when the routine is called 038 * @param books map from titles of comic books to characters that 039 * appear in them; typically empty when the routine is called 040 * @modifies characters, books 041 * @effects fills characters with a list of all unique character names 042 * @effects fills books with a map from each comic book to all characters 043 * appearing in it 044 * @throws MalformedDataException if the file is not well-formed: 045 * each line contains exactly two tokens separated by a tab, 046 * or else starting with a # symbol to indicate a comment line. 047 */ 048 public static void parseData(String filename, Set<String> characters, 049 Map<String, List<String>> books) throws MalformedDataException { 050 // Why does this method accept the Collections to be filled as 051 // parameters rather than making them a return value? To allows us to 052 // "return" two different Collections. If only one or neither Collection 053 // needs to be returned to the caller, feel free to rewrite this method 054 // without the parameters. Generally this is better style. 055 BufferedReader reader = null; 056 try { 057 reader = new BufferedReader(new FileReader(filename)); 058 059 // Construct the collections of characters and books, one 060 // <character, book> pair at a time. 061 String inputLine; 062 while ((inputLine = reader.readLine()) != null) { 063 064 // Ignore comment lines. 065 if (inputLine.startsWith("#")) { 066 continue; 067 } 068 069 // Parse the data, stripping out quotation marks and throwing 070 // an exception for malformed lines. 071 inputLine = inputLine.replace("\"", ""); 072 String[] tokens = inputLine.split("\t"); 073 if (tokens.length != 2) { 074 throw new MalformedDataException("Line should contain exactly one tab: " 075 + inputLine); 076 } 077 078 String character = tokens[0]; 079 String book = tokens[1]; 080 081 // Add the parsed data to the character and book collections. 082 characters.add(character); 083 if (!books.containsKey(book)) { 084 books.put(book, new ArrayList<String>()); 085 } 086 books.get(book).add(character); 087 } 088 } catch (IOException e) { 089 System.err.println(e.toString()); 090 e.printStackTrace(System.err); 091 } finally { 092 if (reader != null) { 093 try { 094 reader.close(); 095 } catch (IOException e) { 096 System.err.println(e.toString()); 097 e.printStackTrace(System.err); 098 } 099 } 100 } 101 } 102 103}