{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "70e5d299",
   "metadata": {},
   "source": [
    "# Lasso and CV demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f053f8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "LR = 0.01\n",
    "NUM_ITERATIONS = 500\n",
    "\n",
    "# NOTE: here, X and Y represent only the training data, not the overall dataset (train + test).\n",
    "X = np.random.random((1000, 50))\n",
    "Y = np.random.random((1000,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7b455645",
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(w, b, Xin):\n",
    "    return np.dot(Xin, w) + b\n",
    "\n",
    "def fit(Xin, Yin, l1_penalty) :\n",
    "    # no_of_training_examples, no_of_features\n",
    "    m, n = Xin.shape\n",
    "\n",
    "    # weight initialization\n",
    "    w = np.zeros(n)\n",
    "    b = 0\n",
    "\n",
    "    # gradient descent learning\n",
    "    for i in range(NUM_ITERATIONS) :\n",
    "        w, b = update_weights(w, b, Xin, Yin, l1_penalty)\n",
    "\n",
    "    return w, b\n",
    "\n",
    "def update_weights(w, b, Xin, Yin, l1_penalty) :\n",
    "    m, n = Xin.shape\n",
    "    Y_pred = predict(w, b, Xin)\n",
    "\n",
    "    # calculate gradients\n",
    "    dW = np.zeros(n)\n",
    "    for j in range(n) :\n",
    "        if w[j] > 0 :\n",
    "            dW[j] = ( - ( 2 * ( Xin[:, j] ).dot(Yin - Y_pred))\n",
    "            + l1_penalty ) / m\n",
    "        else :\n",
    "            dW[j] = ( - ( 2 * ( Xin[:, j] ).dot(Yin - Y_pred))\n",
    "            - l1_penalty ) / m\n",
    "\n",
    "    db = - 2 * np.sum(Yin - Y_pred) / m\n",
    "\n",
    "    # update weights\n",
    "    w = w - LR * dW\n",
    "    b = b - LR * db\n",
    "\n",
    "    return w, b\n",
    "\n",
    "def rmse_lasso(w, b, Xin, Yin):\n",
    "    Y_pred = predict(w, b, Xin)\n",
    "    return rmse(Yin, Y_pred)\n",
    "\n",
    "def rmse(a, b):\n",
    "    return np.sqrt(np.mean(np.square(a - b)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "efbc404c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# candidate values for l1 penalty\n",
    "l1_penalties = 10 ** np.linspace(-5, -1)\n",
    "err = np.zeros(len(l1_penalties))\n",
    "\n",
    "# We will perform 10-fold CV. Here, we will create the training and validation sets by\n",
    "# creating an indices array with randomized index values to use when slicing our training data.\n",
    "k_fold = 10\n",
    "num_samples = len(X) // k_fold\n",
    "indices = np.random.permutation(len(X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9c09ae5",
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx, l1_penalty in enumerate(l1_penalties):\n",
    "    for k in range(k_fold): #10-fold CV\n",
    "        # slice larger training set into validation and training sets for each fold\n",
    "        VAL = indices[k * num_samples : (k + 1) * num_samples]\n",
    "        TRAIN = np.concatenate((indices[: k * num_samples], indices[(k + 1) * num_samples:]))\n",
    "\n",
    "        x_train_fold = X[TRAIN]\n",
    "        y_train_fold = Y[TRAIN]\n",
    "\n",
    "        x_val_fold = X[VAL]\n",
    "        y_val_fold = Y[VAL]\n",
    "\n",
    "        w, b = fit(x_train_fold, y_train_fold, l1_penalty)\n",
    "\n",
    "        # accumulate error from this fold of validation set\n",
    "        err[idx] += rmse_lasso(w, b, x_val_fold, y_val_fold)\n",
    "\n",
    "    #calculate error for kth fold\n",
    "    err[idx]/=k_fold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1af6dd4",
   "metadata": {},
   "outputs": [],
   "source": [
    "l1_penalty_best = l1_penalties[np.argmin(err)]\n",
    "\n",
    "print('Best choice of l1_penalty = ', l1_penalty_best)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cse446",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}