### Python code to produce plots and experiments used in statistics lecture. ### The entry point is the lecture() function, which will be called if this file is run. ### Note: Docstrings are not fully fleshed out in good style. ### Some lines may be too wide. import matplotlib.pyplot as plt import numpy as np import random ruth_rolls = [5, 6, 6, 5, 6, 6] ruth_name = "Ruth" student_rolls = [1, 5, 1, 3, 5, 1] student_name = "Benjamin" def rolls_to_hist(rolls): """Convert a list of single-die rolls to a list of counts. The input is a list, all of whose numbers are between 1 and 6 inclusive. In the result list, result[i] is the number of times i appeared in the input list. In the result list, result[0] == 0. """ #result = [0, 0, 0, 0, 0, 0, 0] result = [0] * 7 for roll in rolls: result[roll] += 1 return result assert rolls_to_hist([1, 5, 4, 6, 2, 1, 1, 3, 2, 5]) == [0, 3, 2, 1, 1, 2, 1] def normalize(alist): """Return a new list in which the elements are scaled to sum to 1. The input list is a list of numbers. It is not modified by this routine. """ result = [] list_sum = float(sum(alist)) for elt in alist: normalized = elt / list_sum result.append(normalized) return result assert normalize([3, 1, 0, 3, 2, 1]) == [.3, .1, 0, .3, .2, .1] def plot_die_hist(hist, label): """Plot a die-roll normalized histogram, without showing it. The input is a histogram indicating, for each number of pips, its normalized frequency. """ plt.plot(normalize(hist), label=label) plt.plot(normalize(hist), 'o') plt.axis([1, len(hist) - 1, 0, 0.7]) plt.xlabel("Pips") plt.ylabel("Frequency") plt.legend() def plot_die_rolls(rolls, label): """Plot a die-roll normalized histogram. The input is a list of rolls, where each element is the sum of visible pips.""" hist = rolls_to_hist(rolls) plot_die_hist(hist, label) def show_two_hist(rolls1, name1, rolls2, name2, title): """Plot two die-roll normalized histograms, then show them.""" plt.clf() plot_die_rolls(rolls1, name1) plot_die_rolls(rolls2, name2) plt.title(title) plt.show() def plot_one_hist(rolls): """Plot a histogram of the given die roll outcomes, without showing the plot.""" numdice = 1 plt.hist(rolls, bins=np.linspace(numdice - .5, 6 * numdice + .5, num=5 * numdice + 2)) plt.axis([0.5, 6.5, 0, max(rolls_to_hist(rolls)) + 1]) plt.xlabel("Pips") plt.ylabel("Frequency") def show_one_hist(rolls, title): """Plot a histogram of the given die roll outcomes, and show it.""" plt.clf() plot_one_hist(rolls) plt.title(title) plt.show() def show_one_hist_and_line(rolls, title): """Like show_one_hist, but also plot a line.""" plt.clf() plot_one_hist(rolls) hist = rolls_to_hist(rolls) plt.plot(hist) plt.plot(hist, 'o') plt.axis([0.5, 6.5, 0, max(hist) + 1]) plt.title(title) plt.show() def die_roll(): """Return a single roll of a 6-sided die.""" return random.randrange(6) + 1 def dice_roll(numdice): """Return the sum of the given number of rolls of a 6-sided die.""" result = 0 for i in range(numdice): result = result + die_roll() return result def dice_rolls(numdice, numtrials): """Returns a list of numtrials numbers, each the sum of rolling numdice dice.""" result = [] for i in range(numtrials): result.append(dice_roll(numdice)) return result def show_notation(): show_one_hist(dice_rolls(1, 10), "10 fair die rolls") show_one_hist(dice_rolls(1, 10), "10 fair die rolls") random10_rolls = dice_rolls(1, 10) show_one_hist(random10_rolls, "10 fair die rolls") show_one_hist_and_line(random10_rolls, "The same 10 fair die rolls, connected by a line") def plot_random(numdice, numtrials): rolls = dice_rolls(numdice, numtrials) plot_die_rolls(rolls, "Random (" + str(numtrials) + ")") def show_random_1000(): plt.clf() plot_random(1, 1000) plt.title("Compare ideal to random die rolls") plt.show() def show_random_many(): plt.clf() plot_random(1, 10) plot_random(1, 100) plot_random(1, 1000) plot_random(1, 10000) plt.title("Compare different numbers of die rolls") plt.show() def show_random(): plt.clf() plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") plt.title("Expected histogram of die rolls") plt.show() plt.clf() plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") show_random_1000() show_random_many() def make_dice_histogram(therolls=None, therollname=None, numdice=None, numtrials=100000): if numdice == None: numdice = len(therolls) plt.hist(dice_rolls(numdice, numtrials), bins=np.linspace(numdice - .5, 6 * numdice + .5, num=5 * numdice + 2)) plt.xlabel("Pips") plt.ylabel("Frequency") plt.title(str(numdice) + " dice") def annotate_dice_histogram(therolls, therollname, ycoord): """Add an annotation to a dice histogram.""" thepips = sum(therolls) plt.annotate(therollname, xy=(thepips, ycoord / 3), xytext=(thepips, ycoord), arrowprops=dict(facecolor='black', shrink=0.2)) def show_dice_game_histogram(): """Show Ruth's and the student's rolls from the dice game.""" plt.clf() make_dice_histogram(therolls=ruth_rolls) annotate_dice_histogram(ruth_rolls, "Ruth's rolls", 3000) annotate_dice_histogram(student_rolls, student_name + "'s rolls", 3000) plt.title("Outcome of the die-rolling game") plt.show() def dice_likelihood_sum(thesum, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert numdice <= thesum <= 6 * numdice rolls = dice_rolls(numdice, numtrials) as_unlikely = 0 for roll in rolls: if thesum <= roll: as_unlikely = as_unlikely + 1 return float(as_unlikely) / numtrials def dice_likelihood(therolls, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert len(therolls) == numdice thesum = sum(therolls) return dice_likelihood_sum(thesum, numdice, numtrials) def print_dice_likelihood(therolls, numtrials, name): "Print the p-value for the given rolls." print "Likelihood of " + name + "'s result or better:", \ dice_likelihood(therolls, len(therolls), numtrials) def print_dice_game_likelihood(): "Print the p-value for Ruth's rolls and the student's rolls." print_dice_likelihood(ruth_rolls, 100000, "Ruth") print_dice_likelihood(student_rolls, 100000, student_name) def show_10dice_histogram(): "Show a histogram of rolls of 10 dice, plus 6 points on the histogram." plt.clf() avg35 = [3, 4, 3, 4, 3, 4, 3, 4, 3, 4] avg40 = [4, 4, 4, 4, 4, 4, 4, 4, 4, 4] avg45 = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5] avg50 = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5] avg55 = [5, 6, 5, 6, 5, 6, 5, 6, 5, 6] avg60 = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 2000) annotate_dice_histogram(avg40, "4", 2000) annotate_dice_histogram(avg45, "4.5", 2000) annotate_dice_histogram(avg50, "5", 2000) annotate_dice_histogram(avg55, "5.5", 2000) annotate_dice_histogram(avg60, "6", 2000) plt.title("10 die rolls") plt.show() def show_20dice_histogram(): "Show a histogram of rolls of 20 dice, plus 6 points on the histogram." plt.clf() avg35 = [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] avg40 = [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] avg45 = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5] avg50 = [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5] avg55 = [5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6] avg60 = [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 1500) annotate_dice_histogram(avg40, "4", 1500) annotate_dice_histogram(avg45, "4.5", 1500) annotate_dice_histogram(avg50, "5", 1500) annotate_dice_histogram(avg55, "5.5", 1500) annotate_dice_histogram(avg60, "6", 1500) plt.title("20 die rolls") plt.show() def lecture(): """The main entry point, which shows all the lecture's plots in turn.""" show_notation() show_two_hist(ruth_rolls, ruth_name, student_rolls, student_name, "Dice-rolling competition") show_random() show_dice_game_histogram() print_dice_game_likelihood() #show_10dice_histogram() #show_20dice_histogram() if __name__ == '__main__': lecture()