In [ ]:
import numpy as np
import matplotlib.pyplot as plt

k = 5
m = 100
B = 5000
sigma = 1

# We create m k-tuples of 1D-Gaussians with mean 0 and standard deviation sigma = 1
Z = np.random.normal(0,sigma,(m,k))
Z_mean = Z.mean(axis=1)
In [ ]:
# For each k-tuple, we compute the MLE estimates of sigma
x = ((Z-Z_mean[:,None])**2).mean(axis=1)
print(f"Mean of MLE estimates of sigma^2: {x.mean()}")
print(f"Std deviation of MLE estimates of sigma^2: {x.std()}")

plt.hist(x,100)

print("1 is contained in 2 standard deviations of the observations?: ",(1<(x.mean()+2*x.std()))&(1>(x.mean()-2*x.std())))
print("Are there negative numbers within its 2 standard deviations?: ",0>(x.mean()-2*x.std()))
Mean of MLE estimates of sigma^2: 0.785830023427095
Std deviation of MLE estimates of sigma^2: 0.5496871739192876
1 is contained in 2 standard deviations of the observations?:  True
Are there negative numbers within its 2 standard deviations?:  True
In [ ]:
print("We will use non-parametric bootstrap to compute a better confidence interval.")

sigma2_ests = np.zeros(B)
for b in range(B):
    # For each bootstrap, we sample with replacement m samples from the m observations
    bootstrap = np.random.randint(0,m,m)
    x_bootstrap = x[bootstrap]
    sigma2_ests[b] = np.mean(x_bootstrap)

# Note that we did not generate any new observations in the above process

# The bootstrap confidence interval is computed using the 5 and 95 percentiles of the estimates
interval_left = np.percentile(sigma2_ests,5)
interval_right = np.percentile(sigma2_ests,95)

plt.hist(sigma2_ests,100)
plt.axvline(x=interval_left,linestyle='--',c='r')
plt.axvline(x=interval_right,linestyle='--',c='r')
plt.axvline(x=1,linestyle=':',c='b',label='Variance of population Gaussian dist.')
plt.legend(bbox_to_anchor=(1,1))
plt.title('Confidence interval of MLE of sigma^2.')
We will use non-parametric bootstrap to compute a better confidence interval.
Out[ ]:
Text(0.5, 1.0, 'Confidence interval of MLE of sigma^2.')
In [ ]:
# Recall that the MLE of Gaussian standard deviation is a biased estimator!
print("Is 1 contained in the Bootstrap confidence interval?: ",(1<interval_right)&(1>interval_left))
Is 1 contained in the Bootstrap confidence interval?:  False