Here is an dummy tool template that you can use to prototype your tool. This tool template assumes that each row of your training dataset corresponds to a zipcode.
Note that this tool uses a dummy model. Please modify the inputs, outputs and model to fit your chosen hypothesis and training dataset.
Thanks to the following groups for making this work possible:
{zipcode_table : Zipcodes ? Specify the zipcodes for which you would like to predict metrics}
# CrossCompute
zipcode_table_path = 'zipcode.csv'
target_folder = '/tmp'
import pandas as pd
zipcode_table = pd.read_csv(zipcode_table_path)
zipcode_table[:3]
Here we want to normalize tree count. We can do that by dividing tree count by zipcode area in square meters.
url = 'https://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip'
# Use default projection to get zipcode area in square feet
# import geotable
# nyc_zipcode_table = geotable.load(url)
# print(nyc_zipcode_table.iloc[0]['AREA'])
# print(nyc_zipcode_table.iloc[0]['geometry_object'].area)
# Get UTM projection
import geotable
utm_proj4 = geotable.load_utm_proj4(url)
utm_proj4
# Get zipcode area in square meters
nyc_zipcode_table = geotable.load(url, target_proj4=utm_proj4)
nyc_zipcode_table.iloc[0]['geometry_object'].area
nyc_zipcode_table['Area in Square Meters'] = nyc_zipcode_table[
'geometry_object'].apply(lambda g: g.area)
nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']][:5]
# Extract relevant columns
nyc_zipcode_table = nyc_zipcode_table[['ZIPCODE', 'Area in Square Meters']].copy()
# Merge tables
zipcode_table['Zipcode'] = zipcode_table['Zipcode'].astype(str)
dataset_table = pd.merge(zipcode_table, nyc_zipcode_table, left_on='Zipcode', right_on='ZIPCODE')
dataset_table[:5]
# Add normalized column
dataset_table['Tree Count Per Square Meter'] = dataset_table[
'Tree Count'] / dataset_table['Area in Square Meters']
dataset_table[:5]
# Load model
from pickle import load
model = load(open('dummy-model.pkl', 'rb')) # !!! Replace dummy model with your model
model
# Run model
X = dataset_table[['Tree Count Per Square Meter', 'Air Pollution Value']].values
y = model.predict(X)
y
# Add column
dataset_table['Predicted Graduation Rate'] = y
dataset_table
# Select columns
output_table = dataset_table[[
'Zipcode',
'Tree Count Per Square Meter',
'Air Pollution Value',
'Predicted Graduation Rate',
]].copy()
# Save file to target folder to include it in the result download
target_path = target_folder + '/a.csv'
output_table.to_csv(target_path, index=False)
print(f'a_table_path = {target_path}') # Print table_path to render table
output_geotable = output_table.copy() # Prevent SettingwithCopyWarning
output_geotable
# Define wkt_by_zipcode
import geotable
url = 'https://data.cityofnewyork.us/download/i8iw-xf4u/application%2Fzip'
# Specify target_proj4 to convert from NYC spatial reference to longitude and latitude
nyc_zipcode_table = geotable.load(url, target_proj4=geotable.LONGITUDE_LATITUDE_PROJ4)
wkt_by_zipcode = {}
for index, row in nyc_zipcode_table.iterrows():
zipcode = row['ZIPCODE']
geometry = row['geometry_object']
wkt_by_zipcode[zipcode] = geometry.wkt
# Geocode zipcode polygons
output_geotable['WKT'] = output_geotable['Zipcode'].apply(
lambda zipcode: wkt_by_zipcode.get(str(int(zipcode))))
output_geotable = output_geotable.dropna(subset=['WKT'])
output_geotable[:3]
# nyc_zipcode_table[nyc_zipcode_table['ZIPCODE'] == '10019'].iloc[0]['geometry_object'].wkt
# Set color for each geometry using a gradient
# output_geotable['FillReds'] = output_geotable['Predicted Graduation Rate']
# Set color for each geometry using a rule
output_geotable['FillColor'] = output_geotable.apply(
lambda row: 'r' if row['Predicted Graduation Rate'] < 50 else 'g',
axis=1)
# See what we did
output_geotable[:3]
# Save file to target folder to include it in the result download
target_path = target_folder + '/b.csv'
output_geotable.to_csv(target_path, index=False)
print(f'b_geotable_path = {target_path}') # Print geotable_path to render map
%matplotlib inline
axes = output_table[[
'Predicted Graduation Rate',
]].plot(kind='bar')
# Save file to target folder to include it in the result download
target_path = target_folder + '/c.png'
figure = axes.get_figure()
figure.savefig(target_path)
print(f'c_image_path = {target_path}')
YOUR INTERPRETATION OF THE RESULTS
{a_table : YOUR TABLE NAME ? YOUR TABLE DESCRIPTION}
{b_geotable : YOUR MAP NAME ? YOUR MAP DESCRIPTION}
{c_image : YOUR PLOT NAME ? YOUR PLOT DESCRIPTION}