CSE143 Sample Program handout #10
Program WordCount.java
----------------------
// This program prompts the user for the name of a file and then counts the
// occurrences of words in the file (ignoring case). It then reports the
// frequencies using a cutoff supplied by the user that limits the output
// to just those words with a certain minimum frequency.
import java.util.*;
import java.io.*;
public class WordCount {
public static void main(String[] args) throws FileNotFoundException {
// open the file
Scanner console = new Scanner(System.in);
System.out.print("What is the name of the text file? ");
String fileName = console.nextLine();
Scanner input = new Scanner(new File(fileName));
// wordCounts occurrences
SortedMap wordCounts = new TreeMap();
while (input.hasNext()) {
String next = input.next().toLowerCase();
if (!wordCounts.containsKey(next)) {
wordCounts.put(next, 1);
} else {
wordCounts.put(next, wordCounts.get(next) + 1);
}
}
// get cutoff and report frequencies
System.out.println("Total words = " + wordCounts.size());
System.out.print("Minimum number of occurrences for printing? ");
int min = console.nextInt();
for (String word : wordCounts.keySet()) {
int count = wordCounts.get(word);
if (count >= min)
System.out.println(count + "\t" + word);
}
}
}
Another Sample Log of Execution
-------------------------------
What is the name of the text file? moby.txt
Total words = 30368
Minimum number of occurrences for printing? 500
4571 a
1354 all
587 an
6182 and
563 are
1701 as
1289 at
973 be
1691 but
1133 by
1522 for
1067 from
754 had
741 have
1686 he
552 him
2459 his
1746 i
3992 in
512 into
1555 is
1754 it
562 like
578 my
1073 not
506 now
6408 of
933 on
775 one
675 or
882 so
599 some
2729 that
14092 the
602 their
506 there
627 they
1239 this
4448 to
551 upon
1567 was
644 were
500 whale
552 when
547 which
1672 with
774 you