import turicreate as tc
sales = tc.SFrame.read_csv('Philadelphia_Crime_Rate_noNA.csv/')
sales
sales
# to select a single column
sales['HousePrice']
# An SArray is a single dimension of a SFrame
type(sales['HousePrice'])
# can also select multiple by using list of columns names
sales['HousePrice', 'CrimeRate']
# If there are multiple columns, will be an SFrame instead of an SArray
type(sales['HousePrice', 'CrimeRate'])
# To get the row with index 2
sales[2]
# The type of a row is a dictionary (For Java people: A Map with String keys and any typed values)
type(sales[2])
# To access the value for a specific input in that row
row = sales[2]
row['County']
# Can also do this in one line with
# sales[2]['County]
# Can be fancy and select multiple rows with "slice" notation
# This creates a range from 0 (inclusive) to 10 (exclusive) counting by 2's
# equivalent to sales[0, 2, 4, 6, 8]
sales[0:10:2]
# To get all the rows that are in Bucks county
sales[sales['County'] == 'Bucks']
# Let's break up what this is doing
print 'What are you passing into the array access?'
mask = sales['County'] == 'Bucks'
print mask
# This is a "mask" of 1s and 0s that indicate which ones should be selected. The entries are 1 if they match the condition `== 'Bucks'`
print 'What happens if you use this list to access the sales?'
sales[mask]
# What if you want every row that is in Bucks county and has a CrimeRate > 15
sales[(sales['County'] == 'Bucks') & (sales['CrimeRate'] > 15)] # Note: Use `&` instead of `and` like you would in regular python
# Could also have done this which is a bit clunkier
# sales[sales['County'] == 'Bucks'][sales['CrimeRate'] > 15]
# Also remember that in most cases in Python, if you want to find out the number of elements in a collection you use `len`
print 'All sales', len(sales)
print 'Bucks sales', len(sales[sales['County'] == 'Bucks'])