// Hunter Schafer, CSE 143 // This program counts the number of unique words in a large file. // This program calculates the number of unique words in a file // and prints out information about how long it took to run import java.io.*; import java.util.*; public class UniqueWords { public static void main(String[] args) throws FileNotFoundException { System.out.println("Reading file..."); Scanner input = new Scanner(new File("mobydick.txt")); long start = System.currentTimeMillis(); int uniqueWords = countUnique(input); long end = System.currentTimeMillis(); long elapsed = end - start; System.out.println("The file has " + uniqueWords + " words."); System.out.println("Took " + elapsed + " ms."); } // pre: input is not null // post: returns the number of unique tokens in input public static int countUnique(Scanner input) { // Hunter: Slow - List words = new ArrayList(); // Hunter: Pretty fast and ordered - Set words = new Treeset(); // Hunter: Fastest, but un-ordered Set words = new HashSet(); // Hunter: I went ahead and simplified the logic in the method since // we are using a set (old code below) while (input.hasNext()) { String word = input.next(); words.add(word); } return words.size(); /* Hunter: We don't need to check if it contains with a Set since it ignores duplicates int count = 0; while (input.hasNext()) { String word = input.next(); if (!words.contains(word)) { count++; words.add(word); } } return count; */ } }