// Erika Wolfe, CSE 143 // This program counts the number of words in a large file. // It demonstrates the use of a Set collection. import java.io.*; import java.util.*; public class WordCount { public static final int MIN_COUNT = 20; public static void main(String[] args) throws FileNotFoundException { // Scanner input = new Scanner(new File("smallmoby.txt")); Scanner input = new Scanner(new File("mobydick.txt")); // countUnique isn't necessary if we return the Map from printWordCounts // int uniqueWords = countUnique(input); Map wordCounts = printWordCounts(input); System.out.println("The file has " + wordCounts.size() + " words."); System.out.println("The file has " + wordCounts.get("whale") + " instances of whale."); } // returns the number of unique words in the given Scanner // ignores case when looking at words public static int countUnique(Scanner input) { Set words = new HashSet(); while (input.hasNext()) { String word = input.next().toLowerCase(); words.add(word); } return words.size(); } // returns a Map which associates words in the given Scanner to the // number of times they occur, ignoring case of the words // prints the words that occur at least MIN_COUNT times public static Map printWordCounts(Scanner input) { // chose TreeMap because we wanted to print an alphabetized result Map wordCounts = new TreeMap(); while (input.hasNext()) { String word = input.next().toLowerCase(); if (!wordCounts.containsKey(word)) { wordCounts.put(word, 1); } else { // like data[index] = data[index] + 1 wordCounts.put(word, wordCounts.get(word) + 1); } } // only print the words with a count higher than MIN_COUNT for (String word : wordCounts.keySet()) { int count = wordCounts.get(word); if (count >= MIN_COUNT) { System.out.println(word + " - " + count); } } System.out.println(); return wordCounts; } }