KzO9K




Pay Notebook Creator: Rocky Singh0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
# CrossCompute
zipcode_table_path = 'zipcode.csv'
target_folder = '/tmp'
In [2]:
import pandas as pd
zipcode_table = pd.read_csv(zipcode_table_path)
zipcode_table[:3]
Out[2]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
zipcode
0 11365
1 10010
2 10021
In [3]:
url = 'https://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip'
In [4]:
# Use default projection to get zipcode area in square feet
# import geotable
# nyc_zipcode_table = geotable.load(url)
# print(nyc_zipcode_table.iloc[0]['AREA'])
# print(nyc_zipcode_table.iloc[0]['geometry_object'].area)
In [5]:
# Get UTM projection
import geotable
utm_proj4 = geotable.load_utm_proj4(url)
utm_proj4
Out[5]:
'+proj=utm +zone=18 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
In [6]:
# Get zipcode area in square meters
nyc_zipcode_table = geotable.load(url, target_proj4=utm_proj4)
nyc_zipcode_table.iloc[0]['geometry_object'].area
Out[6]:
2107694.2486345936
In [7]:
nyc_zipcode_table['Area in Square Meters'] = nyc_zipcode_table[
    'geometry_object'].apply(lambda g: g.area)
nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']][:5]
Out[7]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
ZIPCODE Area in Square Meters
0 11436 2.107694e+06
1 11213 2.751168e+06
2 11212 3.897041e+06
3 11225 2.200345e+06
4 11218 3.423119e+06
In [8]:
# Extract relevant columns
nyc_zipcode_table = nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']].copy()
In [9]:
zipcode_table[:3]
Out[9]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
zipcode
0 11365
1 10010
2 10021
In [10]:
# Merge tables
zipcode_table['zipcode'] = zipcode_table['zipcode'].astype(str)
dataset_table = pd.merge(zipcode_table, nyc_zipcode_table, left_on='zipcode', right_on='ZIPCODE')
dataset_table[:3]
Out[10]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
zipcode ZIPCODE Area in Square Meters
0 11365 11365 6.442713e+06
1 10010 10010 9.069620e+05
2 10021 10021 9.744470e+05
In [12]:
dataset_table['Tree Count'] = [100, 300, 900]
In [13]:
# Add normalized column
dataset_table['Tree Count Per Square Meter'] = dataset_table[
    'Tree Count'] / dataset_table['Area in Square Meters']
dataset_table[:5]
Out[13]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
zipcode ZIPCODE Area in Square Meters Tree Count Tree Count Per Square Meter
0 11365 11365 6.442713e+06 100 0.000016
1 10010 10010 9.069620e+05 300 0.000331
2 10021 10021 9.744470e+05 900 0.000924
In [ ]:
 
In [15]:
# Load model
from pickle import load
model = load(open('model.pkl', 'rb'))  # !!! Replace dummy model with your model
model
Out[15]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
In [16]:
dataset_table[:3]
Out[16]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
zipcode ZIPCODE Area in Square Meters Tree Count Tree Count Per Square Meter
0 11365 11365 6.442713e+06 100 0.000016
1 10010 10010 9.069620e+05 300 0.000331
2 10021 10021 9.744470e+05 900 0.000924
In [17]:
# Run model
X = dataset_table[['Tree Count Per Square Meter']].values
y = model.predict(X)
y
Out[17]:
array([-0.01499999, -0.01499986, -0.01499962])
In [ ]:
# Add column
dataset_table['Predicted Graduation Rate'] = y
dataset_table