import java.util.*; import java.io.*; // This class shows the efficieny of using a Set over a List in // the case where we count unique words in a file. // We also practiced using Maps to store word counts for each // unique word in the file and printed out the ones that exceed // our cutoff. public class WordCount { public static final int WORDS_CUTOFF = 300; public static void main(String[] args) throws FileNotFoundException { Scanner console = new Scanner(System.in); System.out.print("Enter file name: "); String fileName = console.nextLine(); List list = new ArrayList(); long start = System.currentTimeMillis(); int uniqueWords = countUniqueWords(new Scanner(new File(fileName)), list); long end = System.currentTimeMillis(); System.out.println("unique words: " + uniqueWords); System.out.println("List operations took " + (end - start) + "ms"); // What set should we use? Set set = new HashSet(); start = System.currentTimeMillis(); uniqueWords = countUniqueWords(new Scanner(new File(fileName)), set); end = System.currentTimeMillis(); System.out.println("unique words: " + uniqueWords); System.out.println("Set operations took " + (end - start) + "ms"); printCounts(new Scanner(new File(fileName))); } // Takes in a Scanner input and List to store unique words and // returns the number of unique words in the given input file public static int countUniqueWords(Scanner input, List list) { while(input.hasNext()) { String token = input.next(); if(!list.contains(token)) { list.add(token); } } return list.size(); } // Takes in a Scanner input and Set to store unique words and // returns the number of unique words in the given input file public static int countUniqueWords(Scanner input, Set set) { while(input.hasNext()) { String token = input.next(); //if(!set.contains(token)) { not necessary! set.add(token); //} } return set.size(); } // Takes in a Scanner input and creates a corresponding word count for // each unique word in the input. It prints out all of the words and their // respective word counts if they are higher than the WORDS_CUTOFF threshold public static void printCounts(Scanner input) { // word => count Map wordCount = new TreeMap(); while(input.hasNext()) { String token = input.next(); if(!wordCount.containsKey(token)) { wordCount.put(token, 0); } /* int oldCount = wordCount.get(token); int newCount = oldCount + 1; wordCount.put(token, newCount); */ wordCount.put(token, wordCount.get(token) + 1); } for(String word : wordCount.keySet()) { int count = wordCount.get(word); if(count > WORDS_CUTOFF) { System.out.println(word + " -> " + count); } } } }