// Stuart Reges // 3/12/08 // // Program that compares several structures for a large number of calls on // these three operations: add, remove, contains. It expects the name of // a file as a command-line argument, as in: // java HashTest moby.txt // It has several tests that it runs: // test1: using a sorted ArrayList // test2: using Java's TreeSet (binary search tree) // test3: using Java's HashSet // test4: using our HashSet import java.util.*; import java.io.*; public class HashTest { public static void main(String[] args) throws FileNotFoundException { Scanner console = new Scanner(System.in); System.out.print("input file? "); Scanner input = new Scanner(new File(console.nextLine())); System.out.println("Which test? (1=ArrayList, 2=TreeSet,"); System.out.print(" 3=Java's HashSet, 4=our HashSet)? "); int which = console.nextInt(); List data = new ArrayList(); while (input.hasNext()) data.add(input.next()); int max = data.size() / 10; // somewhat arbitrary formula for max System.out.println("Total words = " + data.size()); System.out.println("max = " + max); if (which == 1) test1(data, max); else if (which == 2) test2(data, max); else if (which == 3) test3(data, max); else if (which == 4) test4(data, max); else System.out.println("illegal test number"); } // run a test for an unsorted ArrayList public static void test1(List data, int max) { long start = System.currentTimeMillis(); List lst = new ArrayList(max); for (int i = 0; i < data.size(); i++) if (i % 5 == 0) lst.remove(data.get(i)); else if (i % 5 == 1) lst.contains(data.get(i)); else if (!lst.contains(data.get(i))) lst.add(data.get(i)); System.out.println("list size after = " + lst.size()); double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.out.println("ArrayList time = " + elapsed); } // run a test for a TreeSet (binary search tree) public static void test2(List data, int max) { long start = System.currentTimeMillis(); // no TreeSet constructor that takes a size, so max not used TreeSet s = new TreeSet(); for (int i = 0; i < data.size(); i++) if (i % 5 == 0) s.remove(data.get(i)); else if (i % 5 == 1) s.contains(data.get(i)); else s.add(data.get(i)); System.out.println("list size after = " + s.size()); double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.out.println("TreeSet time = " + elapsed); } // run a test for Java's implementation of HashSet public static void test3(List data, int max) { long start = System.currentTimeMillis(); java.util.HashSet s = new java.util.HashSet(max); for (int i = 0; i < data.size(); i++) if (i % 5 == 0) s.remove(data.get(i)); else if (i % 5 == 1) s.contains(data.get(i)); else s.add(data.get(i)); System.out.println("list size after = " + s.size()); double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.out.println("Java's HashSet time = " + elapsed); } // run a test for our implementation of HashSet public static void test4(List data, int max) { long start = System.currentTimeMillis(); HashSet s = new HashSet(max); for (int i = 0; i < data.size(); i++) if (i % 5 == 0) s.remove(data.get(i)); else if (i % 5 == 1) s.contains(data.get(i)); else s.add(data.get(i)); System.out.println("list size after = " + s.size()); double elapsed = (System.currentTimeMillis() - start) / 1000.0; System.out.println("our HashSet time = " + elapsed); } }