# Class Example - WordCounts # Internal Data Structure: dictionary of frequencies of words # # Many extra comments added for explanation purposes. # Not necessarily an example of good style. # Modified to take a list of words instead of filename as input # to the constructor so this code can be cut and pasted into # the Python Tutor. class WordCounts: """Represents the words in a file.""" # Internal representation: # variable wordcounts is a dictionary from words to their frequency def __init__(self, word_list): """Create a WordCounts object from the given word list""" # This is a constructor, gets called when we do: # var_name = WordCounts(list_of_words) # WordCount objects will have one data attribute (aka "field") # This is created here: wordcounts self.wordcounts = {} for w in word_list: self.wordcounts.setdefault(w, 0) self.wordcounts[w] += 1 def word_count(self, word): """Return the count of the given word""" if self.wordcounts.has_key(word): return self.wordcounts[word] else: return 0 def topk(self, k=10): """Return a list of the top k most frequent words in order""" scores_with_words = [(c,w) for (w,c) in self.wordcounts.items()] scores_with_words.sort(reverse=True) return scores_with_words[0:k] def total_words(self): """Return the total number of words in the file""" return sum(self.wordcounts.values()) #--------------------------------------------------------- # Sample client program # # The EXACT same sample program is used with both versions # of the WordCount class because the client interface is # the same for both versions of the classes. words = ["the", "lazy", "brown", "fox", "fox", "the", "the"] words2 = ["the", "lazy", "brown", "fox", "fox", "hippo", "hippo", "hippo"] # Create two instances of the class WordCount # wc and wc2 are both objects of type WordCount wc = WordCounts(words) wc2 = WordCounts(words2) print "type(wc) is", type(wc) print "type(wc2) is", type(wc2) print "type(WordCounts) is:", type(WordCounts) k=5 print "wc top", k, ":", wc.topk(k) print "wc2 top", k, ":", wc2.topk(k) word = "the" print "wc of", word, ":", wc.word_count(word) print "wc2 of", word, ":", wc2.word_count(word) # A few more commands: # List attributes of the class #print "dir on WordCounts class:", dir(WordCounts) # List attributes of an object #print "dir on wc object:", dir(wc) # Get docstring for the class and all its methods #help(WordCounts)