// Erika Wolfe, CSE 143
// This program counts the number of words in a large file.                                               
// It demonstrates the use of a Set collection.                                              

import java.io.*;
import java.util.*;

public class WordCount {
    public static final int MIN_COUNT = 20;

   public static void main(String[] args) throws FileNotFoundException {
       // Scanner input = new Scanner(new File("smallmoby.txt"));
       Scanner input = new Scanner(new File("mobydick.txt"));
      
      // countUnique isn't necessary if we return the Map from printWordCounts
      // int uniqueWords = countUnique(input);

      Map<String, Integer> wordCounts = printWordCounts(input);
      System.out.println("The file has " + wordCounts.size() + " words.");
      System.out.println("The file has " + wordCounts.get("whale") + " instances of whale.");

   }
   
   // returns the number of unique words in the given Scanner
   // ignores case when looking at words
   public static int countUnique(Scanner input) {
       Set<String> words = new HashSet<String>();
       
       while (input.hasNext()) {
           String word = input.next().toLowerCase();
           words.add(word);
       }
       
       return words.size();
   }
   
   // returns a Map which associates words in the given Scanner to the
   // number of times they occur, ignoring case of the words
   // prints the words that occur at least MIN_COUNT times
   public static Map<String, Integer> printWordCounts(Scanner input) {
       // chose TreeMap because we wanted to print an alphabetized result
       Map<String, Integer> wordCounts = new TreeMap<String, Integer>();
       
       while (input.hasNext()) {
           String word = input.next().toLowerCase(); 
           if (!wordCounts.containsKey(word)) {
               wordCounts.put(word, 1);
           } else {
               // like data[index] = data[index] + 1
               wordCounts.put(word, wordCounts.get(word) + 1);
           }
       }
       
       // only print the words with a count higher than MIN_COUNT
       for (String word : wordCounts.keySet()) {
           int count = wordCounts.get(word);
           if (count >= MIN_COUNT) {
               System.out.println(word + " - " + count); 
           }
       }
       System.out.println();
       
       return wordCounts;
   }
}