import pandas as pd from sklearn.model_selection import train_test_split import numpy as np # TODO: Add any additional imports here sales = pd.read_csv('home_data.csv') # Q1 # TODO num_rows = None # TODO y = None # TODO num_inputs = None # Q2 # TODO avg_price_3_bed = None # Q3 # TODO percent_q3 = None # Q4 # TODO # Set seed to create pseudo-randomness np.random.seed(416) # Split data into 80% train and 20% validation train_data, val_data = train_test_split(sales, test_size=0.2) basic_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode'] advanced_features = basic_features + [ 'condition', # condition of the house 'grade', # measure of qality of construction 'waterfront', # waterfront property 'view', # type of view 'sqft_above', # square feet above ground 'sqft_basement', # square feet in basementab 'yr_built', # the year built 'yr_renovated', # the year renovated 'lat', # the longitude of the parcel 'long', # the latitide of the parcel 'sqft_living15', # average sq.ft. of 15 nearest neighbors 'sqft_lot15', # average lot size of 15 nearest neighbors ] # TODO basic_model = None advanced_model = None # Q5 # TODO train_rmse_basic = None train_rmse_advanced = None # Q6 # TODO val_rmse_basic = None val_rmse_advanced = None