001    package hw5;
002    import java.io.BufferedReader;
003    import java.io.File;
004    import java.io.FileReader;
005    import java.io.IOException;
006    import java.util.ArrayList;
007    import java.util.List;
008    import java.util.Map;
009    import java.util.Set;
010    
011    /**
012     * Parser utility to load the Marvel Comics dataset.
013     * @author Krysta Yousoufian
014     *
015     */
016    public class MarvelParser {
017    
018        /**
019         * Reads the Marvel Universe dataset.
020         * Each line of the input file contains a character name and a comic
021         * book the character appeared in, separated by a tab character
022         * 
023         * @requires file is well-formed, with each line containing exactly two
024         *          tokens separated by a tab, or else starting with a # symbol
025         *          to indicate a comment line.
026         * @param filename the file that will be read
027         * @param characters list in which all character names will be stored;
028         *          typically empty when the routine is called
029         * @param books map from titles of comic books to characters that
030         *          appear in them; typically empty when the routine is called
031         * @modifies characters, books
032         * @effects fills characters with a list of all unique character names
033         * @effects fills books with a map from each comic book to all characters
034         *          appearing in it
035         */
036        public static void parseData(String filename, Set<String> characters,
037                Map<String, List<String>> books) throws Exception {
038            // Why does this method accept the Collections to be filled as
039            // parameters rather than making them a return value? To allows us to
040            // "return" two different Collections. If only one or neither Collection
041            // needs to be returned to the caller, feel free to rewrite this method
042            // without the parameters. Generally this is better style.
043            
044            try {
045                BufferedReader reader = new BufferedReader(new FileReader(filename));
046                
047                // Construct the collections of characters and books, one
048                // <character, book> pair at a time.
049                String inputLine;
050                while ((inputLine = reader.readLine()) != null) {
051                    
052                    // Ignore comment lines.
053                    if (inputLine.startsWith("#"))
054                        continue;
055                    
056                    // Parse the data, stripping out quotation marks and throwing
057                    // an exception for malformed lines.
058                    inputLine = inputLine.replace("\"", "");
059                    String[] tokens = inputLine.split("\t");
060                    if (tokens.length != 2)
061                        throw new Exception("Line should contain exactly one tab: " + inputLine);
062                    
063                    String character = tokens[0];
064                    String book = tokens[1];
065                    
066                    // Add the parsed data to the character and book collections.
067                    characters.add(character);
068                    if (!books.containsKey(book))
069                        books.put(book, new ArrayList<String>());
070                    books.get(book).add(character);
071                }
072                
073                reader.close();
074                
075            } catch (IOException e) {
076                System.err.println(e.toString());
077                e.printStackTrace(System.err);
078            }
079        }
080    
081    }