#include #include #include #include #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif #define HT_SIZE 71 /* * Hash Table "ADT" */ #define NUM_HASH_VALS 20 #define HT_SMALL_PRIME 17 typedef enum ht_cell_enum { EMPTY, FULL, DELETED } HTCellStatus; typedef struct ht_cell_struct { char *data; HTCellStatus status; } HTCell; typedef struct ht_struct { HTCell *cell; int numItems; int numCells; int numCollisions; int h1_value[NUM_HASH_VALS]; int h2_value[NUM_HASH_VALS]; } HashTable; /* Initializes the fields of a new HashTable struct. Should be called before any other HashTable functions are used on the table. */ void hashtable_init(HashTable *ht, int num_cells) { int i; ht->cell = (HTCell *) malloc(num_cells * sizeof(HTCell)); ht->numCells = num_cells; ht->numItems = 0; ht->numCollisions = 0; for (i = 0; i < num_cells; i++) { ht->cell[i].data = NULL; ht->cell[i].status = EMPTY; } for (i = 0; i < NUM_HASH_VALS; i++) { /* Old implementations of rand() often had a lot of regularity in the low-order bits, so we throw a few out just in case. */ ht->h1_value[i] = (int) ((rand() >> 4) % 10000); ht->h2_value[i] = (int) ((rand() >> 4) % 10000); } } /* Deallocates memory held by the hash table and its entries. */ void hashtable_destroy(HashTable *ht) { int i; for (i = 0; i < ht->numCells; i++) if (ht->cell[i].status == FULL) free(ht->cell[i].data); free(ht->cell); } /* Computes the hash value of a particular string by generating a linear combination of the character values with the entries in the block array used as coefficients. Note that this value should subsequently be taken mod the table size by the caller. */ unsigned int compute_hash_val(char *string, int block[]) { unsigned int result = 0; int i = 0; while (string[i] != '\0') { result += string[i] * block[i % NUM_HASH_VALS]; i++; } return result; } /* Returns TRUE if the hashtable is full, or FALSE otherwise. */ int hashtable_full(HashTable *ht) { return ht->numItems >= ht->numCells; } /* Inserts the given string into the table. This function assumes the string is not a duplicate and that the table is not full. It accumulates the number of collisions in the HashTable struct. The string is copied into its own buffer, so the parameter may be discarded after insertion. */ void hashtable_insert(HashTable *ht, char *string) { /* We first try the primary and secondary hash functions; if neither works then we fall back to linear probing. Quadratic probing is NOT a good idea for this assignment because the table is very full, making it possible that no empty cell will be found even if one exists! */ int hash_val = (int) (compute_hash_val(string, ht->h1_value) % ht->numCells); int hash2_val; if (ht->cell[hash_val].status == FULL) { ht->numCollisions++; hash2_val = (int) (HT_SMALL_PRIME - compute_hash_val(string, ht->h2_value) % HT_SMALL_PRIME); hash_val = (hash_val + hash2_val) % ht->numCells; } /* Because of the precondition that the table is not full, this search must terminate. */ while (ht->cell[hash_val].status == FULL) { ht->numCollisions++; hash_val = (hash_val + hash2_val) % ht->numCells; } ht->cell[hash_val].data = (char *) malloc((strlen(string) + 1) * sizeof(char)); strcpy(ht->cell[hash_val].data, string); ht->cell[hash_val].status = FULL; } /* This function returns TRUE if the element is in the table, or FALSE otherwise. In a "real" hash table, it would probably also return some data associated with the lookup string. */ int hashtable_find(HashTable *ht, char *string) { int hash_val, hash2_val, start_place; hash_val = (int) (compute_hash_val(string, ht->h1_value) % ht->numCells); start_place = hash_val; if ((ht->cell[hash_val].status == FULL) && (strcmp(string, ht->cell[hash_val].data) == 0)) return TRUE; else if (ht->cell[hash_val].status == EMPTY) return FALSE; else { hash2_val = (int) (HT_SMALL_PRIME - compute_hash_val(string, ht->h2_value) % HT_SMALL_PRIME); hash_val = (hash_val + hash2_val) % ht->numCells; } while (ht->cell[hash_val].status != EMPTY) { if ((ht->cell[hash_val].status == FULL) && (strcmp(string, ht->cell[hash_val].data) == 0)) return TRUE; else hash_val = (hash_val + hash2_val) % ht->numCells; if (hash_val == start_place) break; } return FALSE; } /* * Main program */ void report_collisions(HashTable *ht, int iteration) { printf("Iteration %i: cumulative collisions: %d\n", iteration, ht->numCollisions); } int main(void) { HashTable ht; int num_read = 0; char buffer[200]; /* Initialize the random number generator & hash table. */ srand(time(NULL)); hashtable_init(&ht, HT_SIZE); while (scanf("%s", buffer) != EOF) { num_read++; if (!hashtable_find(&ht, buffer)) hashtable_insert(&ht, buffer); if (num_read % 5 == 0) report_collisions(&ht, num_read); } report_collisions(&ht, num_read); return 0; }