from math import sqrt import pandas as pd import numpy as np from sklearn.model_selection import train_test_split # Add any additional imports here # TODO np.random.seed(416) # Import data sales = pd.read_csv('home_data.csv') sales = sales.sample(frac=0.01) # All of the features of interest selected_inputs = [ 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated' ] # Compute the square and sqrt of each feature all_features = [] for data_input in selected_inputs: square_feat = data_input + '_square' sqrt_feat = data_input + '_sqrt' # Q1: Compute the square and square root as two new features # TODO all_features.extend([data_input, square_feat, sqrt_feat]) price = sales['price'] sales = sales[all_features] # Train test split train_and_validation_sales, test_sales, train_and_validation_price, test_price = \ train_test_split(sales, price, test_size=0.2) train_sales, validation_sales, train_price, validation_price = \ train_test_split(train_and_validation_sales, train_and_validation_price, test_size=.125) # .10 (validation) of .80 (train + validation) # Q2: Standardize data # TODO train_sales = None validation_sales = None test_sales = None # Q3: Train baseline model # TODO test_rmse_unregularized = None # Train Ridge models l2_lambdas = np.logspace(-5, 5, 11, base = 10) # Q4: Implement code to evaluate Ridge Regression with various L2 Penalties # TODO ridge_data = None # Q5: Analyze Ridge data # TODO best_l2 = None test_rmse_ridge = None num_zero_coeffs_ridge = None # Train LASSO models l1_lambdas = np.logspace(1, 7, 7, base=10) # Q6: Implement code to evaluate LASSO Regression with various L1 penalties # TODO lasso_data = None # Q7: LASSO Analysis # TODO best_l1 = None test_rmse_lasso = None num_zero_coeffs_lasso = None