ECSP




Pay Notebook Creator: Haige Cui0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [24]:
# CrossCompute
address_table_path = 'Simplified Table with average monthly savings(within 0.5 Mile) and tree count(within 0.5 Mile).csv'
search_radius_in_miles = 0.5
industry_select = """
Manufacturing

Manufacturing
Wholesale/Warehouse/Distribution
Commercial
Landlord
Public Benefit Corp
Other

"""
program_select = """
ICIP

City/State
ICAP
ICIP
IDA
Relocator
Tenant

"""
target_folder = '/tmp'
In [34]:
# Load inputs
import pandas as pd
df = pd.read_csv(address_table_path)
df.dropna(axis=0, subset=['Longitude','Latitude'], inplace = True)
df = df[['Latitude','Longitude','Periodic Savings']]
df
Out[34]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
Latitude Longitude Periodic Savings
0 40.745706 -73.929565 1068.75
1 40.633153 -74.150999 494.93
2 40.785144 -73.844833 263.25
3 40.673106 -74.002300 4200.66
4 40.742386 -73.932148 2016.42
5 40.746364 -73.995088 6184.09
6 40.676789 -73.889346 927.22
7 40.745811 -73.931466 137.24
8 40.665766 -73.871132 7055.95
9 40.774940 -73.896804 1459.78
10 40.745510 -73.949497 501.64
11 40.707402 -73.862139 348.23
12 40.716031 -73.932208 529.89
13 40.715227 -73.963076 1087.05
14 40.779184 -73.900755 438.37
15 40.676351 -73.839512 1339.47
16 40.659430 -73.982725 8691.62
17 40.720892 -73.846811 1452.19
18 40.752183 -73.899803 1676.76
19 40.724990 -73.956540 9731.65
20 40.808190 -73.888513 2341.37
21 40.698747 -73.936863 1052.95
22 40.786469 -73.834527 153.62
23 40.812964 -73.953759 174.28
24 40.654623 -73.900288 626.67
25 40.702763 -73.986681 151.07
26 40.812964 -73.953759 144.57
27 40.660096 -73.997405 5526.71
28 40.848176 -73.846583 102919.54
29 40.679518 -73.956819 260.78
... ... ... ...
472 40.693463 -73.987227 993.45
473 40.812964 -73.953759 111.31
474 40.645338 -74.022864 550.18
475 40.673968 -73.992253 2287.26
476 40.647760 -74.016249 71.47
477 40.811574 -73.891527 324.56
478 40.842733 -73.838373 3647.13
479 40.586217 -74.200825 274038.51
480 40.645338 -74.022864 545.85
481 40.691597 -73.987714 65.70
482 40.800542 -73.909345 106.75
483 40.753432 -73.924597 190.84
484 40.638163 -74.143480 272.20
485 40.776128 -73.896413 7009.46
486 40.835524 -73.840986 170.41
487 40.656048 -73.897078 1935.54
488 40.723300 -73.920396 3364.77
489 40.713695 -73.918990 2174.81
490 40.812964 -73.953759 246.11
491 40.680349 -74.006684 232.43
492 40.779751 -73.898971 2847.53
493 40.645338 -74.022864 1289.55
494 40.753928 -73.933028 195.55
495 40.653997 -74.009925 293.73
496 40.805255 -73.876414 178.89
497 40.742386 -73.932148 1355.71
498 40.704425 -73.936010 12336.36
499 40.688681 -73.843777 710.82
500 40.679518 -73.956819 229.98
501 40.632925 -74.151034 178.12
<p>502 rows × 3 columns</p>
In [35]:
industry = industry_select.strip().splitlines()[0]
program = program_select.strip().splitlines()[0]
In [36]:
# Get longitude and latitude for each address
# from geopy import GoogleV3
# geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode

# def get_longitude_latitude(row):
#     location = geocode(row['Address'])
#     row['Longitude'] = location.longitude
#     row['Latitude'] = location.latitude
#     return row

# df = df.apply(get_longitude_latitude, axis=1)
# df[:3]

# Prepare prediction dataset
prediction_table = df.copy()
In [37]:
# Prepare prediction dataset
prediction_table = df.copy()
# # Add "Average Monthly Savings of Buildings Within 0.5 Mile Radius" column
# prediction_table['Average Monthly Savings of Buildings Within 0.5 Mile Radius'] = [100, 200, 300]
# # Add "Tree Count Within 0.5 Mile Radius"
# prediction_table['Tree Count Within 0.5 Mile Radius'] = [10, 20, 30]
In [38]:
# # Load your model
# from pickle import load
# model = load(open('dummy-model.pkl', 'rb'))
# model

# # Run your model to add column "Monthly Savings"
#prediction_table['Monthly Savings'] = [1, 2, 3]
In [39]:
#X = prediction_table[['Periodic Savings within 0.5 Mile', 'Total Tree Count within 0.5 Mile']].values
## prediction_table['Monthly Savings'] = model.predict(X)
In [40]:
target_path = target_folder + '/savings.csv'
prediction_table.to_csv(target_path, index=False)
print(f'prediction_table_path = {target_path}')
prediction_table_path = /tmp/savings.csv
In [41]:
target_path = target_folder + '/savings-map.csv'
map_table = prediction_table.copy()
map_table['RadiusInPixelsRange3-30'] = map_table['Periodic Savings']
In [42]:
map_table['FillColor'] = ['y' if index in df else 'b' for index in map_table.index]
map_table.to_csv(target_path, index=False)
print(f'prediction_geotable_path = {target_path}')
prediction_geotable_path = /tmp/savings-map.csv
In [43]:
%matplotlib inline
axes = prediction_table[[
    'Periodic Savings'
]].plot(kind='bar')

# Save file to target folder to include it in the result download
target_path = target_folder + '/c.png'
figure = axes.get_figure()
figure.savefig(target_path)
print(f'c_image_path = {target_path}')
c_image_path = /tmp/c.png
In [ ]:
 
In [ ]: