### Python code to produce plots and experiments used in statistics lecture. ### The entry point is the lecture() function, which is called at the end ### of this file. import matplotlib.pyplot as plt import numpy as np import math import random ruth_rolls = [5, 6, 6, 5, 6, 6] ruth_name = "Ruth" student_rolls = [1, 2, 4, 3, 3, 1] student_name = "Jacob" def normalize(list): """Return a new list in which the elements are scaled to sum to 1. The input list is a list of numbers. It is not modified by this routine. """ listsum = float(sum(list)) result = [] for elt in list: result.append(elt / listsum) return result assert normalize([3, 1, 0, 3, 2, 1]) == [.3, .1, 0, .3, .2, .1] def rolls_to_hist(rolls): """Convert a list of single-die rolls to a list of counts. The input is a list, all of whose numbers are between 1 and 6 inclusive. In the result list, result[i] is the number of times i appeared in the input list. In the result list, result[0] == 0. """ result = [0,0,0,0,0,0,0] for roll in rolls: result[roll] += 1 return result assert rolls_to_hist([1, 5, 4, 6, 2, 1, 1, 3, 2, 5]) == [0, 3, 2, 1, 1, 2, 1] def plot_die_rolls(rolls, label): """Plot a die-roll normalized histogram. The input is a list of rolls, where each element is the sum of visible pips.""" hist = rolls_to_hist(rolls) plot_die_hist(hist, label) def plot_die_hist(hist, label): """Plot a die-roll normalized histogram, without showing it. The input is a histogram indicating, for each number of pips, its normalized frequency. """ plt.plot(normalize(hist), label=label) plt.plot(normalize(hist), 'o') plt.axis([1,len(hist)-1,0,0.7]) plt.xlabel("Pips") plt.ylabel("Frequency") plt.legend() def show_two_hist(rolls1, name1, rolls2, name2, title): """Plot two die-roll normalized histograms, then show them.""" plot_die_rolls(rolls1, name1) plot_die_rolls(rolls2, name2) plt.title(title) plt.show() def plot_one_hist(rolls): """Plot a histogram of the given die roll outcomes, without showing the plot.""" numdice = 1 plt.hist(rolls, bins=np.linspace(numdice-.5, 6*numdice+.5, num=5*numdice+2)) plt.axis([0.5,6.5,0,max(rolls_to_hist(rolls))+1]) plt.xlabel("Pips") plt.ylabel("Frequency") def show_one_hist(rolls, title): """Plot a histogram of the given die roll outcomes, and show it.""" plot_one_hist(rolls) plt.title(title) plt.show() def show_one_hist_and_line(rolls, title): """Like show_one_hist, but also plot a line.""" plot_one_hist(rolls) hist = rolls_to_hist(rolls) plt.plot(hist) plt.plot(hist, 'o') plt.axis([0.5,6.5,0,max(hist)+1]) plt.title(title) plt.show() def die_roll(): """Return a single roll of a 6-sided die.""" return random.randrange(6) + 1 def dice_roll(numdice): """Return the sum of the given number of rolls of a 6-sided die.""" result = 0 for i in range(numdice): result = result + die_roll() return result def dice_rolls(numdice, numtrials): """Returns a list of numtrials numbers, each the sum of rolling numdice dice.""" result = [] for i in range(numtrials): result.append(dice_roll(numdice)) return result random10_rolls = dice_rolls(1, 10) def show_notation(): show_one_hist(dice_rolls(1, 10), "10 fair die rolls") show_one_hist(dice_rolls(1, 10), "10 fair die rolls") random10_rolls = dice_rolls(1, 10) show_one_hist(random10_rolls, "10 fair die rolls") show_one_hist_and_line(random10_rolls, "The same 10 fair die rolls, connected by a line") def plot_random(numdice, numtrials): rolls = dice_rolls(numdice, numtrials) plot_die_rolls(rolls, "Random (" + str(numtrials) + ")") def show_random_1000(): plot_random(1, 1000) plt.title("Compare ideal to random die rolls") plt.show() def show_random_many(): plot_random(1, 10) plot_random(1, 100) plot_random(1, 1000) plot_random(1, 10000) plt.title("Compare different numbers of die rolls") plt.show() def show_random(): plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") plt.title("Expected histogram of die rolls") plt.show() plot_die_rolls([1, 2, 3, 4, 5, 6], "Ideal") show_random_1000() show_random_many() zurich_first_roll_hist0 = [25, 28, 45, 49, 106, 136] zurich_second_roll_hist0 = [5, 4, 6, 10, 28, 57] def hist0_to_rolls(hist0): rolls = [] for i in range(6): for j in range(hist0[i]): rolls.append(i+1) return rolls zurich_first_rolls = hist0_to_rolls(zurich_first_roll_hist0) zurich_second_rolls = hist0_to_rolls(zurich_second_roll_hist0) def show_zurich_students(): plot_die_hist(zurich_first_roll_hist0, "First roll (389 students)") plt.xlabel("Payoff (CHF)") plt.axis([0,5,0,.6]) plt.title("Zurich students") plt.show() plot_die_hist(zurich_first_roll_hist0, "First roll (389 students)") plot_die_hist(zurich_second_roll_hist0, "Second roll (110 students)") plt.xlabel("Payoff (CHF)") plt.axis([0,5,0,.6]) plt.title("Zurich students") plt.show() def dice_rolls(numdice, numtrials): dice_hist = [0] * (numdice * 6 + 1) result = [] for i in range(numtrials): result.append(dice_roll(numdice)) return result def make_dice_histogram(therolls=None, therollname=None, numdice=None, numtrials=100000): if numdice == None: numdice = len(therolls) plt.hist(dice_rolls(numdice, numtrials), bins=np.linspace(numdice-.5, 6*numdice+.5, num=5*numdice+2)) plt.xlabel("Pips") plt.ylabel("Frequency") plt.title(str(numdice) + " dice") def annotate_dice_histogram(therolls, therollname, ycoord): "Add an annotation to a dice histogram." thepips = sum(therolls) plt.annotate(therollname, xy=(thepips, ycoord/3), xytext=(thepips, ycoord), arrowprops=dict(facecolor='black', shrink=0.2), ) def show_dice_game_histogram(): "Show Ruth's and the student's rolls from the dice game." make_dice_histogram(therolls=ruth_rolls) annotate_dice_histogram(ruth_rolls, "Ruth's rolls", 3000) annotate_dice_histogram(student_rolls, student_name + "'s rolls", 3000) plt.title("Outcome of the die-rolling game") plt.show() def dice_likelihood(therolls, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert len(therolls) == numdice thesum = sum(therolls) return dice_likelihood_sum(thesum, numdice, numtrials) def dice_likelihood_sum(thesum, numdice, numtrials): """Return the likelihood that a random roll of numdice will show at least as many pips as therolls does.""" assert numdice <= thesum <= 6*numdice rolls = dice_rolls(numdice, numtrials) as_unlikely = 0 for roll in rolls: if thesum <= roll: as_unlikely = as_unlikely + 1 return float(as_unlikely) / numtrials def print_dice_likelihood(therolls, numtrials, name): "Print the p-value for the given rolls." print "Likelihood of " + name + "'s result or better:", dice_likelihood(therolls, len(therolls), numtrials) def print_dice_game_likelihood(): "Print the p-value for Ruth's rolls and the student's rolls." print_dice_likelihood(ruth_rolls, 100000, "Ruth") print_dice_likelihood(student_rolls, 100000, student_name) def show_zurich_histogram(): "Show a histogram of the Zurich students' first roll, then a histogram of both rolls." make_dice_histogram(therolls=zurich_first_rolls, numtrials=10000) print "Zurich students first roll: sum of", len(zurich_first_rolls),"trials =", sum(zurich_first_rolls) annotate_dice_histogram(zurich_first_rolls, "First roll", 30) plt.title("Zurich students, first roll") plt.show() make_dice_histogram(therolls=zurich_second_rolls, numtrials=10000) print "Zurich students second roll: sum of", len(zurich_second_rolls),"trials =", sum(zurich_second_rolls) annotate_dice_histogram(zurich_second_rolls, "Second roll", 100) plt.title("Zurich students, second roll") plt.show() def print_zurich_dice_likelihood(): "Print the p-value for the Zurich students first roll and for their second roll." print_dice_likelihood(zurich_first_rolls, 100000, "first roll") print_dice_likelihood(zurich_second_rolls, 100000, "second roll") def show_10dice_histogram(): "Show a histogram of rolls of 10 dice, plus 6 points on the histogram." avg35 = [3,4,3,4,3,4,3,4,3,4] avg40 = [4,4,4,4,4,4,4,4,4,4] avg45 = [4,5,4,5,4,5,4,5,4,5] avg50 = [5,5,5,5,5,5,5,5,5,5] avg55 = [5,6,5,6,5,6,5,6,5,6] avg60 = [6,6,6,6,6,6,6,6,6,6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 2000) annotate_dice_histogram(avg40, "4", 2000) annotate_dice_histogram(avg45, "4.5", 2000) annotate_dice_histogram(avg50, "5", 2000) annotate_dice_histogram(avg55, "5.5", 2000) annotate_dice_histogram(avg60, "6", 2000) plt.title("10 die rolls") plt.show() def show_20dice_histogram(): "Show a histogram of rolls of 20 dice, plus 6 points on the histogram." avg35 = [3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4] avg40 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] avg45 = [4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5] avg50 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] avg55 = [5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6] avg60 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] make_dice_histogram(therolls=avg35) annotate_dice_histogram(avg35, "3.5", 1500) annotate_dice_histogram(avg40, "4", 1500) annotate_dice_histogram(avg45, "4.5", 1500) annotate_dice_histogram(avg50, "5", 1500) annotate_dice_histogram(avg55, "5.5", 1500) annotate_dice_histogram(avg60, "6", 1500) plt.title("20 die rolls") plt.show() def lecture(): """The main entry point, which shows all the lecture's plots in turn.""" show_notation() show_two_hist(ruth_rolls, ruth_name, student_rolls, student_name, "Dice-rolling competition") show_random() #show_zurich_students() show_dice_game_histogram() print_dice_game_likelihood() #show_zurich_histogram() #print_zurich_dice_likelihood() #show_10dice_histogram() #show_20dice_histogram() lecture()