# Class Example - WordCounts # Internal Data Structure: dictionary of frequencies of words # # Many extra comments added for explanation purposes. # Not necessarily an example of good style. # Modified to take a list of words instead of filename as input # to the constructor so this code can be cut and pasted into # the Python Tutor. class WordCounts: """Represents the words in a file.""" # Internal representation: # variable wordcounts_dict is a dictionary mapping a word its frequency def __init__(self, word_list): """Create a WordCounts object from the given word list""" # This is a constructor, gets called when we do: # var_name = WordCounts(list_of_words) # WordCount objects will have one data attribute (aka "field") # This is created here: wordcounts_dict self.wordcounts_dict = {} for w in word_list: self.wordcounts_dict.setdefault(w, 0) self.wordcounts_dict[w] += 1 def get_count(self, word): """Return the count of the given word""" if word in self.wordcounts_dict: return self.wordcounts_dict[word] else: return 0 def topk(self, k=10): """Return a list of the top k most frequent words in order""" scores_and_words = [(c, w) for (w, c) in self.wordcounts_dict.items()] scores_and_words.sort(reverse=True) return scores_and_words[0:k] def total_words(self): """Return the total number of words in the file""" return sum(self.wordcounts_dict.values()) # --------------------------------------------------------- # Sample client program # # The EXACT same sample program is used with both versions # of the WordCount class because the client interface is # the same for both versions of the classes. words = ["the", "lazy", "brown", "fox", "fox", "the", "the"] words2 = ["the", "lazy", "brown", "fox", "fox", "hippo", "hippo", "hippo"] # Create two instances of the class WordCount # wc and wc2 are both objects of type WordCount wc = WordCounts(words) wc2 = WordCounts(words2) # print("type(wc) is", type(wc)) # print("type(wc2) is", type(wc2)) # print("type(WordCounts) is:", type(WordCounts)) k = 2 print("wc top", k, ":", wc.topk(k)) print("wc2 top", k, ":", wc2.topk(k)) word = "the" print("wc of", word, ":", wc.get_count(word)) print("wc2 of", word, ":", wc2.get_count(word)) # A few more commands: # List attributes of the class # print("dir on WordCounts class:", dir(WordCounts)) # List attributes of an object # print("dir on wc object:", dir(wc)) # Get docstring for the class and all its methods # help(WordCounts)