\documentclass{article}
% Packages
\usepackage{amsmath,amsfonts,amsthm,amssymb,amsopn,bm}
\usepackage[margin=.9in]{geometry}
\usepackage{graphicx}
\usepackage{url}
\usepackage[usenames,dvipsnames]{color}
\usepackage{fancyhdr}
\usepackage{multirow}
\usepackage{hyperref}
% For enumerate environment
\usepackage{enumitem}
\renewcommand{\theenumi}{\alph{enumi}}
\renewcommand{\labelenumi}{(\theenumi)}
% Math commands
\newcommand{\R}{\mathbb{R}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\Var}{\mathrm{Var}}
\def\rvx{{\mathbf{x}}}
\def\rvy{{\mathbf{y}}}
\newcommand{\softmax}{\mathrm{softmax}}
\newcommand{\inv}{^{-1}}
% Formatting
\newcommand{\grade}[1]{\small\textcolor{magenta}{[#1 points]} \normalsize}
\date{{}}
% Solutions
\usepackage{ifthen}
\newboolean{showSolutions}
\setboolean{showSolutions}{false} % Change this to toggle solutions
\newcommand{\solution}[1]{\ifthenelse {\boolean{showSolutions}} {{\leavevmode\color{blue}\textbf{Solution:} #1}}{}}
% Comments
\newcommand{\hugh}{\textcolor{blue}}
\newcommand{\ian}{\textcolor{red}}
% No indent
\usepackage[parfill]{parskip}
\begin{document}
\title{Homework \#0}
\author{\normalsize{CSEP 590B: Explainable AI}\\
\normalsize{Prof. Su-In Lee} \\
\normalsize{Due: 4/4/22 11:59 PM}}
\maketitle
\section{Survey questions (5 points)}
To help us get to know you better, please answer the following questions about your experience with machine learning (ML) and explainable AI (XAI).
\begin{enumerate}
\item \grade{1} What is your level of ML experience (e.g., new, proficient, expert)? What ML-related courses have you taken, during your current degree or otherwise?
\item \grade{1} Where did you get experience with ML (e.g., school, personal study, work)? If you use ML for your job, what domain do you focus on (e.g., medicine, finance, advertising)?
\item \grade{1} What ML models do you use most often (e.g., linear models, random forests, neural networks)? Are there any ML models you would like to learn more about?
\item \grade{1} Describe a past ML project that you found most fascinating, either because of the impact, the effectiveness (or ineffectiveness) of the method, or the elegance of the tools involved.
\item \grade{1} What XAI tools are you familiar with, if any? What problems do you hope to solve with XAI?
\end{enumerate}
\section{Probability review (10 points)}
\begin{enumerate}
\item \grade{1} Consider a continuous random variable $\rvx \in \R$ with probability density function $p(\rvx)$. Define the random variable's expected value, $\E[\rvx]$.
\item \grade{1} Define the random variable's variance, $\Var(\rvx)$.
\item \grade{1} Given $n$ independent and identically distributed (i.i.d.) samples $x_1, \ldots, x_n \sim p(\rvx)$, write an estimator for the expected value $\E[\rvx]$.
\item \grade{1} Show that the estimator from (c) is an \textit{unbiased} estimator (i.e., its expected value is equal to $\E[\rvx]$, the value being estimated).
\textbf{Hint:} recall the linearity of expectations property (see \href{https://en.wikipedia.org/wiki/Expected_value#Properties}{here}).
\item \grade{2} Show how the estimator's variance from (c) compares to $\Var(\rvx)$.
\textbf{Hint:} recall the properties for the variance of a sum of random variables (see \href{https://en.wikipedia.org/wiki/Variance#Basic_properties}{here}).
\item \grade{2} What does the (weak) law of large numbers (LLN) say about the estimator from (c)?
\item \grade{2} Given a second random variable $\rvy$, how do we determine the conditional probability $p(\rvy \mid \rvx)$? What does it mean when we have $p(\rvy) = p(\rvy \mid \rvx)$?
\end{enumerate}
\section{Calculus review (7 points)}
\begin{enumerate}
\item \grade{1} Consider a continuous function $f(t)$, or $f: \R \mapsto \R$. Write the definition of the derivative $\frac{df}{dt}(t)$.
\item \grade{1} Describe the geometric interpretation of the derivative.
\item \grade{1} Consider a continuous function $g(x, y, z)$, or $g: \R^3 \mapsto \R$. Write the definition of the partial derivative $\frac{\partial g}{\partial y}(x, y, z)$.
\item \grade{2} For the function $g(x, y, z)$ and an input value $(x_0, y_0, z_0) \in \R^3$, find the ``direction of greatest increase'' and describe what this means. How does this relate to gradient descent?
\item \grade{2} For a function $h(x, y, z)$ defined as $h(x, y, z) = f(g(x, y, z))$, use the chain rule to find $\frac{\partial h}{\partial y}$ in terms of $f$ and $g$. What is the relevance of chain rule for training neural networks?
\end{enumerate}
\section{ML model review (8 points)}
\begin{enumerate}
\item \grade{2} For a dataset with $n$ examples denoted $X \in \R^{n \times p}$ and $Y \in \R^n$, derive the solution for a linear regression model fit with the standard least squares loss (assuming no intercept term, for simplicity). If we instead fit a logistic regression model with discrete labels $Y \in \{0, 1\}^n$ and log-loss, how does the model fitting procedure differ?
\item \grade{2} What are the differences between decision trees, random forests, and gradient-boosted trees (e.g., XGBoost)?
\item \grade{2} Write the equations to produce predictions for a multi-layer perceptron (MLP) with input $x \in \R^d$, one hidden layer of size $p$, sigmoid activations, and $K$ output probabilities (for $K$ possible classes). Indicate the size of each learnable parameter. \textbf{Hint:} rather than writing a single equation, write a sequence of equations where each output provides the input for the next operation.
\item \grade{1} List the sequence of layers used in the VGG-16 neural network architecture. What is the shape of the last convolutional layer's output? (\textbf{Hint:} you can find this in the original \href{https://arxiv.org/abs/1409.1556}{paper}, or you can find the architecture details in a blog post.)
\item \grade{1} For a neural network with parameters $\theta \in \R^L$, denote the prediction given an input $x$ as $f(x; \theta)$ and the loss for a single prediction as $\ell(f(x; \theta), y)$. Write the update step that we use to improve the parameters $\theta$ repeatedly over the course of training.
\end{enumerate}
\end{document}