# CrossCompute
zipcode_table_path = 'zipcode.csv'
target_folder = '/tmp'
import pandas as pd
zipcode_table = pd.read_csv(zipcode_table_path)
zipcode_table[:3]
url = 'https://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip'
# Use default projection to get zipcode area in square feet
# import geotable
# nyc_zipcode_table = geotable.load(url)
# print(nyc_zipcode_table.iloc[0]['AREA'])
# print(nyc_zipcode_table.iloc[0]['geometry_object'].area)
# Get UTM projection
import geotable
utm_proj4 = geotable.load_utm_proj4(url)
utm_proj4
# Get zipcode area in square meters
nyc_zipcode_table = geotable.load(url, target_proj4=utm_proj4)
nyc_zipcode_table.iloc[0]['geometry_object'].area
nyc_zipcode_table['Area in Square Meters'] = nyc_zipcode_table[
'geometry_object'].apply(lambda g: g.area)
nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']][:5]
# Extract relevant columns
nyc_zipcode_table = nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']].copy()
zipcode_table[:3]
# Merge tables
zipcode_table['zipcode'] = zipcode_table['zipcode'].astype(str)
dataset_table = pd.merge(zipcode_table, nyc_zipcode_table, left_on='zipcode', right_on='ZIPCODE')
dataset_table[:3]
dataset_table['Tree Count'] = [100, 300, 900]
# Add normalized column
dataset_table['Tree Count Per Square Meter'] = dataset_table[
'Tree Count'] / dataset_table['Area in Square Meters']
dataset_table[:5]
# Load model
from pickle import load
model = load(open('model.pkl', 'rb')) # !!! Replace dummy model with your model
model
dataset_table[:3]
# Run model
X = dataset_table[['Tree Count Per Square Meter']].values
y = model.predict(X)
y
# Add column
dataset_table['Predicted Graduation Rate'] = y
dataset_table