# Design Example # Text Analysis Module def read_words(filename): """Given a filename, return a dictionary mapping each word in filename to its frequency in the file""" wordfile = open(filename) worddata = wordfile.read() word_list = worddata.split() wordfile.close() wordcounts_dict = {} for word in word_list: # See the lecture slides for more on setdefault count = wordcounts_dict.setdefault(word, 0) wordcounts_dict[word] = count + 1 return wordcounts_dict def word_count(wordcounts_dict, word): """Given a dictionary mapping word to counts, return the count of the given word in the dictionary. """ # Could also do: return wordcounts_dict.get(word, 0) if word in wordcounts_dict: return wordcounts_dict[word] else: return 0 def topk(wordcounts_dict, k=10): """Given a dictionary mapping word to counts, return a list of (count, word) tuples of the top k most frequent words in the dictionary, sorted from most to least frequent. If less then k unique words in wordcounts_dict, return all words. If more than one word has a certain count, the ordering between them is unspecified. Returns at most k words. If the k+1th word has the same count as the kth word, pick which one to return arbitrarily.""" # Could also do sorting as we have done before using itemgetter # and multiple sorts. Here we use a list comprehension to create # a list of (count, word) tuples. counts_with_words = [(c, w) for (w, c) in wordcounts_dict.items()] counts_with_words.sort(reverse=True) return counts_with_words[0:k] def total_words(wordcounts_dict): """Given a dictionary mapping word to counts, return the total number of words used to create the dictionary""" return sum(wordcounts_dict.values())