### Python code to produce plots and experiments used in statistics lecture. ### The entry point is the lecture() function, called at the end of this file. import matplotlib.pyplot as plt import numpy as np import random ruth_rolls = [5, 6, 6, 5, 6, 6] ruth_name = "Ruth" student_rolls = [6, 4, 5, 1, 1, 4] student_name = "Sergio" def normalize(alist): """Return a new list in which the elements are scaled to sum to 1. The input list is a list of numbers. It is not modified by this routine. """ list_sum = sum(alist) result = [] # Calculate normalized value for each element for elt in alist: normalized = float(elt)/list_sum result.append(normalized) return result assert normalize([3, 1, 0, 3, 2, 1]) == [.3, .1, 0, .3, .2, .1] def rolls_to_hist(rolls): """Convert a list of single-die rolls to a list of counts. The input is a list, all of whose numbers are between 1 and 6 inclusive. In the result list, result[i] is the number of times i appeared in the input list. In the result list, result[0] == 0. """ result = [0,0,0,0,0,0,0] for roll in rolls: result[roll] = result[roll] + 1 return result assert rolls_to_hist([1, 5, 4, 6, 2, 1, 1, 3, 2, 5]) == [0, 3, 2, 1, 1, 2, 1] def plot_die_hist(hist, label): """Plot a die-roll normalized histogram, without showing it. The input is a histogram indicating, for each number of pips, its normalized frequency. """ plt.plot(normalize(hist), label=label) plt.plot(normalize(hist), 'o') plt.axis([1,len(hist)-1,0,0.7]) plt.xlabel("Pips") plt.ylabel("Frequency") plt.legend() def plot_die_rolls(rolls, label): """Plot a die-roll normalized histogram. The input is a list of rolls, where each element is the sum of visible pips.""" hist = rolls_to_hist(rolls) plot_die_hist(hist, label) def show_two_hist(rolls1, name1, rolls2, name2, title): """Plot two die-roll normalized histograms, then show them.""" plot_die_rolls(rolls1, name1) plot_die_rolls(rolls2, name2) plt.title(title) plt.show() def plot_one_hist(rolls): """Plot a histogram of the given die roll outcomes, without showing the plot.""" numdice = 1 plt.hist(rolls, bins=np.linspace(numdice-.5, 6*numdice+.5, num=5*numdice+2)) plt.axis([0.5,6.5,0,max(rolls_to_hist(rolls))+1]) plt.xlabel("Pips") plt.ylabel("Frequency") def show_one_hist(rolls, title): """Plot a histogram of the given die roll outcomes, and show it.""" plot_one_hist(rolls) plt.title(title) plt.show() def show_one_hist_and_line(rolls, title): """Like show_one_hist, but also plot a line.""" plot_one_hist(rolls) hist = rolls_to_hist(rolls) plt.plot(hist) plt.plot(hist, 'o') plt.axis([0.5,6.5,0,max(hist)+1]) plt.title(title) plt.show() def die_roll(): """Return a single roll of a 6-sided die.""" return random.randrange(6) + 1 def dice_roll(numdice): """Return the sum of the given number of rolls of a 6-sided die.""" result = 0 for i in range(numdice): result = result + die_roll() return result def dice_rolls(numdice, numtrials): """Returns a list of numtrials numbers, each the sum of rolling numdice dice.""" result = [] for i in range(numtrials): result.append(dice_roll(numdice)) return result def show_notation(): show_one_hist(dice_rolls(1, 10), "10 fair die rolls") show_one_hist(dice_rolls(1, 10), "10 fair die rolls") random10_rolls = dice_rolls(1, 10) show_one_hist(random10_rolls, "10 fair die rolls") show_one_hist_and_line(random10_rolls, "The same 10 fair die rolls, connected by a line") def plot_random(numdice, numtrials): rolls = dice_rolls(numdice, numtrials) plot_die_rolls(rolls, "Random (" + str(numtrials) + ")") def show_random_1000(): plot_random(1, 1000) plt.title("Compare ideal to random die rolls") plt.show() def show_random_many(): plot_random(1, 10) plot_random(1, 100) plot_random(1, 1000) plot_random(1, 10000) plt.title("Compare different numbers of die rolls") plt.show() def show_random(): plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") plt.title("Expected histogram of die rolls") plt.show() plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") show_random_1000() show_random_many() def make_dice_histogram(therolls=None, therollname=None, numdice=None, numtrials=100000): if numdice == None: numdice = len(therolls) plt.hist(dice_rolls(numdice, numtrials), bins=np.linspace(numdice-.5, 6*numdice+.5, num=5*numdice+2)) plt.xlabel("Pips") plt.ylabel("Frequency") plt.title(str(numdice) + " dice") def annotate_dice_histogram(therolls, therollname, ycoord): """Add an annotation to a dice histogram.""" thepips = sum(therolls) plt.annotate(therollname, xy=(thepips, ycoord/3), xytext=(thepips, ycoord), arrowprops=dict(facecolor='black', shrink=0.2)) def show_dice_game_histogram(): """Show Ruth's and the student's rolls from the dice game.""" make_dice_histogram(therolls=ruth_rolls) annotate_dice_histogram(ruth_rolls, "Ruth's rolls", 3000) annotate_dice_histogram(student_rolls, student_name + "'s rolls", 3000) plt.title("Outcome of the die-rolling game") plt.show() def dice_likelihood_sum(thesum, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert numdice <= thesum <= 6*numdice rolls = dice_rolls(numdice, numtrials) as_unlikely = 0 for roll in rolls: if thesum <= roll: as_unlikely = as_unlikely + 1 return float(as_unlikely) / numtrials def dice_likelihood(therolls, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert len(therolls) == numdice thesum = sum(therolls) return dice_likelihood_sum(thesum, numdice, numtrials) def print_dice_likelihood(therolls, numtrials, name): "Print the p-value for the given rolls." print "Likelihood of " + name + "'s result or better:", dice_likelihood(therolls, len(therolls), numtrials) def print_dice_game_likelihood(): "Print the p-value for Ruth's rolls and the student's rolls." print_dice_likelihood(ruth_rolls, 100000, "Ruth") print_dice_likelihood(student_rolls, 100000, student_name) def show_10dice_histogram(): "Show a histogram of rolls of 10 dice, plus 6 points on the histogram." avg35 = [3,4,3,4,3,4,3,4,3,4] avg40 = [4,4,4,4,4,4,4,4,4,4] avg45 = [4,5,4,5,4,5,4,5,4,5] avg50 = [5,5,5,5,5,5,5,5,5,5] avg55 = [5,6,5,6,5,6,5,6,5,6] avg60 = [6,6,6,6,6,6,6,6,6,6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 2000) annotate_dice_histogram(avg40, "4", 2000) annotate_dice_histogram(avg45, "4.5", 2000) annotate_dice_histogram(avg50, "5", 2000) annotate_dice_histogram(avg55, "5.5", 2000) annotate_dice_histogram(avg60, "6", 2000) plt.title("10 die rolls") plt.show() def show_20dice_histogram(): "Show a histogram of rolls of 20 dice, plus 6 points on the histogram." avg35 = [3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4] avg40 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] avg45 = [4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5] avg50 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] avg55 = [5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6] avg60 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 1500) annotate_dice_histogram(avg40, "4", 1500) annotate_dice_histogram(avg45, "4.5", 1500) annotate_dice_histogram(avg50, "5", 1500) annotate_dice_histogram(avg55, "5.5", 1500) annotate_dice_histogram(avg60, "6", 1500) plt.title("20 die rolls") plt.show() def lecture(): """The main entry point, which shows all the lecture's plots in turn.""" show_notation() show_two_hist(ruth_rolls, ruth_name, student_rolls, student_name, "Dice-rolling competition") show_random() show_dice_game_histogram() print_dice_game_likelihood() show_10dice_histogram() show_20dice_histogram() lecture()