def read_words(filename): """Return a dictionary mapping each word in filename to its frequency in the file""" wordfile = open(filename) worddata = wordfile.read() words = worddata.split() wordfile.close() wordcounts = {} for w in words: cnt = wordcounts.setdefault(w, 0) wordcounts[w] = cnt + 1 return wordcounts def wordcount(wordcounts, word): """Return the count of the given word in wordcounts""" if wordcounts.has_key(word): return wordcounts[word] else: return 0 def topk(wordcounts, k=10): """Return top k most frequent words in wordcount If less then k unique words in wordcount, return all words.""" scores_with_words = [(c,w) for (w,c) in wordcounts.items()] scores_with_words.sort(reverse=True) return scores_with_words[0:k] def totalwords(wordcounts): """Return the total number of words in the file""" return sum([c for (w,c) in wordcounts.items()]) wordcounts = read_words("foo2.txt") print "wordcounts :", wordcounts k = 1 print "top", k, " :", topk(wordcounts, k) print "top 10 :", topk(wordcounts) print "total words in file:", totalwords(wordcounts) print "count of quick:", wordcount(wordcounts, "quick") print "count of foo:", wordcount(wordcounts, "foo")