// Hunter Schafer, CSE 143 // This program counts the number of words in a large file. // It demonstrates the use of a Set collection. import java.io.*; import java.util.*; public class WordCount { public static void main(String[] args) throws FileNotFoundException { System.out.println("Reading file..."); //Scanner input = new Scanner(new File("smallmoby.txt")); Scanner input = new Scanner(new File("mobydick.txt")); long start = System.currentTimeMillis(); //int uniqueWords = countUnique(input); printWordCounts(input); long end = System.currentTimeMillis(); long elapsed = end - start; //System.out.println("The file has " + uniqueWords + " words."); System.out.println("Took " + elapsed + " ms."); } // Returns the number of unique tokens in the input Scanner // Also prints out all the tokens. public static int countUnique(Scanner input) { //List words = new LinkedList(); Set words = new HashSet(); while (input.hasNext()) { String word = input.next(); words.add(word); } // Can't do this :( // for (int i = 0; i < words.size(); i++) { for (String word : words) { System.out.println(word); } return words.size(); } // Prints out the number of times each unique token appears in the given Scanner public static void printWordCounts(Scanner input) { Map wordCounts = new TreeMap(); while (input.hasNext()) { String word = input.next(); // "flew" if (!wordCounts.containsKey(word)) { wordCounts.put(word, 1); } else { int count = wordCounts.get(word); // 1 wordCounts.put(word, count + 1); } } for (String key : wordCounts.keySet()) { System.out.println(key + " " + wordCounts.get(key)); } // Find all the words that appear a given number of times Map> countsToWords = new HashMap>(); for (String key : wordCounts.keySet()) { int count = wordCounts.get(key); /* if (!countsToWords.containsKey(count)) { Set words = new TreeSet(); words.add(key); countsToWords.put(count, words); } else { Set words = countsToWords.get(count); words.add(key); // countsToWords.put(count, words); } */ // general case, set up case if (!countsToWords.containsKey(count)) { countsToWords.put(count, new TreeSet()); } Set words = countsToWords.get(count); words.add(key); } //System.out.println(countsToWords); } }