\documentclass[letterpaper,11pt]{article}
\newcommand{\removefromfile}[1]{}
\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\usepackage{amsmath}
\usepackage{enumitem}
\usepackage{hyperref}
\usepackage{graphicx}
\usepackage[legalpaper, portrait, margin=1in]{geometry}
\usepackage{listings}
\begin{document}
\title{{\bf Problem Set 5 Solutions} }
\author{insert your name here}
\date{}
\maketitle
\section*{Problem 1 (Random Grades) Solution}
Every week, 20,000 students flip a 10,000-sided fair dice, numbered 1 to 10,000, to see if they can get their GPA changed to a 4.0. If they roll a 1, they win (they get their GPA changed). You may assume each student's roll is independent. Let $X$ be the number of students who win.\\
\begin{enumerate}[label=(\alph*)]
\item For any given week, give the appropriate probability distribution (including parameter(s)), and find the expected number of students who win.\\
\textbf{Solution:}
\item For any given week, find the exact probability that at least 2 students win. Give your answer to 5 decimal places.\\
\textbf{Solution:}
\item For any given week, estimate the probability that at least 2 students win, using the Poisson approximation. Give your answer to 5 decimal places.\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 2 (Instagram) Solution}
A photo-sharing startup offers the following service. A client may upload any number
$N$ of photos and the server will compare each of the $N \choose 2$ pairs of photos
with their proprietary image matching algorithms to see if there is any person that
is in both pictures. Testing shows that the matching algorithm is the slowest part
of the service, taking about 100 milliseconds of CPU time per photo pair. Hence, estimating
the number of photos uploaded by each client is a key part of sizing their data center.
The people in charge say that their gut feeling is that $N=10$. You (the chief technical
officer) say, ``but $N$ is a random variable''. What will the \textbf{expected} time (in milliseconds) for CPU demand per
client be (as a function of $N$, $p$ or $\lambda$) if $N$ follows\\
\begin{enumerate}[label=(\alph*)]
\item the ``distribution'' where $N$ is the same fixed number with probability 1?\\
\textbf{Solution:}
\item the Poisson distribution with parameter $\lambda$?\\
\textbf{Solution:}
\item the geometric distribution with parameter $p$?\\
\textbf{Solution:}
\item $N= 80 X + 5$, where $X$ is a Bernoulli random variable with parameter $p$?\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 3 (Binomial From Nowhere) Solution}
Consider repeatedly rolling a fair 6-sided die, each roll being
independent of the others. Define the random variable $Y$ to be the
number of rolls until (and including) the first roll of a 6, and
define the random variable $X$ to be the number of 1's rolled before
the first 6 is rolled. Show that $\Pr(X = j | Y = i)$, as $j$
ranges over its possible values, is the probability mass function of
a binomially distributed random variable and determine its
parameters $n$ and $p$\\
\newline
\textbf{Solution:}
\newpage
\section*{Problem 4 (Sample Sampling Algorithm) Solution}
Consider the algorithm for generating a random sample of size $n$ from the set of integers $\{1, 2, \ldots, N\}$, where $0 < n < N$, as given in the problem statement.
\begin{enumerate}[label=(\alph*)]
\item Let the random variable $I_i$ be the number of rolls it takes from the time the chosen set has
$i - 1$ values to the first time a new value is added (i.e., the chosen set has $i$ values).
What type of random variable from our zoo is $I_i$ and what is/are the relevant parameter(s)?
What is $\rm{I}$ in terms of the random variables $I_i$?
Calculate $E[\rm{I}]$ in terms of N and n.\\
\textbf{Solution:}
\item What is ${\rm Var}(\rm{I})$? You can leave your answer in summation form.\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 6 (A Math Problem) Solution}
For this exercise, give exact answers as simplified fractions.
Compute $\E[X]$ and Var($X$) if $X$ has probability density function
given by \ldots
\(f(x) = \left\{
\begin{array}{ll}
c(1 - x^4) & \mbox{if } -1 < x < 1 \\
0 & \mbox{otherwise}
\end{array}
\right. .\)
Determine the value of $c$ as part of your answer.\\
\newline
\textbf{Solution:}
\newpage
\section*{Problem 7 (Exponential Darts) Solution}
You throw a dart at a circular target of radius $r$. Let $X$ be
the distance of your dart's hit from the center of the target. You
have improved and your aim is such that $X \sim \mbox{Exponential}(4/r)$. (Note that it is possible for the dart to completely miss the target.)\\
\begin{enumerate}[label=(\alph*)]
\item As a function of $r$, determine the value $m$ such that
$\mbox{Pr}(Xm)$. Then, for $r=10$, give the value of
$m$ to 3 decimal places.\\
\textbf{Solution:}
\item What is the probability that you miss the target completely?
Give your answer to 3 decimal places.\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 8 (The Classic Flea Problem) Solution}
A flea of negligible size is trapped in a large, spherical,
inflated beach ball with radius $r$. At this moment, it is equally
likely to be at any point within the ball. Let $X$ be the distance
of the flea from the center of the ball. For $X$, find \ldots\\
\begin{enumerate}[label=(\alph*)]
\item the cumulative distribution function $F$.\\
\textbf{Solution:}
\item the probability density function $f$.\\
\textbf{Solution:}
\item the expected value.\\
\textbf{Solution:}
\item the variance.\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 9 (Normal, Normal, Normal) Solution}
\begin{enumerate}[label=(\alph*)]
\item Suppose that $X$ is normally distributed with mean 50 and standard deviation 10. Calculate the probability that $25 < X < 75$.\\
\textbf{Solution:}
\item Apparently IQ is roughly normally distributed, with a mean of 100 and a standard deviation of about 15 (note: evidence shows IQ doesn't do a good job at actually measure intelligence, it only measure how good you are at doing that one test). What fraction of people would be classified as genius (IQ of 140 or above)?\\
\textbf{Solution:}
\item The height of American adult females is approximately normally distributed with a mean of 64 inches (5' 4'') and a standard deviation of 2.7 inches. Approximately what fraction of American females are 5' tall or less? If we form a basketball team by picking 5 American adult females at random, calculate the probability that at least one of them is over 6 feet (72 inches) tall.\\
\textbf{Solution:}
\end{enumerate}
\newpage
\section*{Problem 10 (Extra Credit) Solution [OPTIONAL]}
Below are two sequences of Heads and Tails, each (supposedly) representing 300 independent flips of a fair coin. One of these sequences was truly randomly created, and one was typed by a human. Both sequences have exactly 149 heads. Which one is more likely to be the ``real'' random sequence? In your write-up, you should justify your reasoning with evidence and valid results, e.g. from running your code on the two sequences. There are multiple valid and correct approaches. To be eligible for full credit, you are required to turn in your detailed analysis along with your Python code.
\begin{enumerate}
\item (Sequence 1)
\begin{verbatim}
TTHHTHTTHTTTHTTTHTTTHTTHTHHTHHTHTHHTTTHHTHTHTTHTHHTTHTHHTHTT
THHTTHHTTHHHTHHTHTTHTHTTHHTHHHTTHTHTTTHHTTHTHTHTHTHTTHTHTHHH
TTHTHTHHTHHHTHTHTTHTTHHTHTHTHTTHHTTHTHTTHHHTHTHTHTTHTTHHTTHT
HHTHHHTTHHTHTTHTHTHTHTHTHTHHHTHTHTHTTHTHHTHTHTTHTTTHHTHTTTHT
HHTHHHHTTTHHTHTHTHTHHHTTHHTHTTTHTHHTHTHTHHTHTTHTTHTHHTHTHTHH
\end{verbatim}
\item (Sequence 2) \begin{verbatim}
HTHHHTHTTHHTTTTTTTTHHHTTTHHTTTTHHTTHHHTTHTHTTTTTTHTHTTTTHHHH
THTHTTHTTTHTTHTTTTHTHHTHHHHTTTTTHHHHTHHHTTTTHTHTTHHHHTHHHHHH
HHTTHHTHHTHHHHHHHTTHTHTTTHHTTTTHTHHTTHTTHTHTHTTHHHHHTTHTTTHT
HTHHTTTTHTTTTTHHTHTHHHHTTTTHTHHHHHHTHTHTHTHHHTHTTHHHTHHHHHHT
HHHTHTTTHHHTTTHHTHTTHHTHHHTHTTHTTHTTTHHTHTHTTTTHTHTHTTHTHTHT
\end{verbatim}
\end{enumerate}\\
\newline
\textbf{Solution:}
\newpage
\end{document}