// Zorah Fung, CSE 143
// Counts the occurrences of words in a large file and prints the most common ones.
// It demonstrates the use of a Map collection.

import java.io.*;
import java.util.*;

public class WordCount2 {
   public static void main(String[] args) throws FileNotFoundException {
      System.out.println("Reading file...");
      Scanner input = new Scanner(new File("mobydick.txt"));

      Map<String, Integer> wordCounts = new TreeMap<String, Integer>();
      // Map<String, Integer> wordCounts = new HashMap<String, Integer>();

      while (input.hasNext()) {
         String word = input.next().toLowerCase();

         // Must check that the word is in the map to see if there is a pre-existing
         // count (otherwise get returns null)
         if (!wordCounts.containsKey(word)) {
            wordCounts.put(word, 1);
         } else {
            wordCounts.put(word, wordCounts.get(word) + 1);
         }
      }

      // iterate over the map using the set of keys
      for (String word : wordCounts.keySet()) {
         if (wordCounts.get(word) > 300) { // only display common words
            System.out.println(word + ": " + wordCounts.get(word));
         }
      }

      // Now group words by their count
      Map<Integer, Set<String>> countToWords = new TreeMap<Integer, Set<String>>();
      for (String word : wordCounts.keySet()) {
         int count = wordCounts.get(word);
         if (!countToWords.contain(count)) {
            countToWords.put(count, new HashSet<String>());
         }
         // Set is guaranteed to exist
         Set<String> words = countToWords.get(count);
         words.add(word);
      } 

      for (int count : countsToWords.keySet()) {
         System.out.println(count + " " + countsToWords.get(count));
      }
   }
}