/*
* Copyright 2012 Steven Gribble
*
* This file is part of the UW CSE 333 course project sequence
* (333proj).
*
* 333proj is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* 333proj is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 333proj. If not, see .
*/
#ifndef HW2_MEMINDEX_H
#define HW2_MEMINDEX_H
#include
#include "libhw1/LinkedList.h"
#include "libhw1/HashTable.h"
// A MemIndex represents an in-memory inverted index, containing a set of
// mappings. Each mapping maps from a word to a list of documents that
// contain the word, including a list of positions within the document that
// the word appears. A MemIndex is simply a HashTable.
typedef HashTable MemIndex;
// The following structure represents a document that
// matched a search query. When a customer issues a query
// against a MemIndex, the MemIndex will construct and
// return a LinkedList of SearchResults, sorted in decreasing
// quality (i.e., the rank field).
typedef struct SearchResult {
uint64_t docid; // a document that matches a search query
uint32_t rank; // an indicator of the quality of the match
} SearchResult, *SearchResultPtr;
// Allocate a new MemIndex. Returns NULL on failure, non-NULL
// on success.
MemIndex AllocateMemIndex(void);
// Frees a previously allocated MemIndex, including all space
// malloc'ed within it.
void FreeMemIndex(MemIndex index);
// Returns the number of words contained within the index.
uint64_t MINumWordsInMemIndex(MemIndex index);
// Adds a "posting list" to the MemIndex. A "posting list" contains
// the set of positions within a document that a particular word
// appears. So, the argument "word" indicates the word that this
// posting list is for and the "docid" argument indicates the docID
// this posting list is for; the word "word" appears in the document
// "docid" in the list of positions (in ascending order) specified by
// the "positions" list. The "positions" list must be non-empty.
//
// memindex takes ownership of "word" and "positions."
//
// Returns 0 on failure, 1 on success.
int MIAddPostingList(MemIndex index, char *word, uint64_t docid,
LinkedList positions);
// Processes a query against the memindex, and returns a list
// of SearchResults. The customer is responsible for freeing
// the list of SearchResults; the appropriate free function pointer
// to pass in to FreeLinkedList is stdlib's free().
//
// Arguments:
//
// - index: the MemIndex to query
//
// - query: a non-zero-lengthed array of strings; each string is
// query word, all lower-case. MIProcessQuery will return a
// list of SearchResults containing all documents that contain
// every query word at least once.
//
// - qlen: the number of query words in the query array.
//
// Returns:
//
// - NULL if no matching documents were found
//
// - a non-NULL LinkedList containing a list of SearchResults
// if at least one matching document was found. The caller
// is responsible freeing the linked list (see above).
LinkedList MIProcessQuery(MemIndex index, char *query[], uint8_t qlen);
// In HW3, customers will need to directly access the contents of
// the MemIndex HashTable. The HashTable, like all HashTables, contains
// HTKeyValues; the value field is a pointer to a WordDocSet
// structure, defined below.
//
// A MemIndex is a HashTable that maps from a word to a WordDocSet.
// Each WordDocSet contains another HashTable that maps from docID to
// a linked list of hits within that docID. Compare this structure
// to the figure in the HW2 web page that depicts an inverted index,
// and you'll see where it fits in.
typedef struct {
char *word;
HashTable docIDs;
} WordDocSet, *WordDocSetPtr;
#endif // HW2_MEMINDEX_