ECSP




Pay Notebook Creator: Haige Cui0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [37]:
# CrossCompute
#address = ''
input_table_path = 'Value_of_Energy_Cost_Savings_Program_Savings_for_Businesses.csv'
target_folder = '/tmp'
In [38]:
import pandas as pd
import numpy as np
# def load(
#     endpoint_url,
#     selected_columns=None,
#     buffer_size=1000,
#     search_term_by_column=None,
#     **kw,
# ):
#     buffer_url = (f'{endpoint_url}?$limit={buffer_size}')
#     if selected_columns:
#         select_string = ','.join(selected_columns)
#         buffer_url += f'&$select={select_string}'
#     for column, search_term in (search_term_by_column or {}).items():
#         buffer_url += f'&$where={column}+like+"%25{search_term}%25"'
#     print(buffer_url)
#     tables = []
    
#     if endpoint_url.endswith('.json'):
#         f = pd.read_json
#     else:
#         f = pd.read_csv

#     t = f(buffer_url, **kw)
#     while len(t):
#         print(len(tables) * buffer_size + len(t))
#         tables.append(t)
#         offset = buffer_size * len(tables)
#         t = f(buffer_url + f'&$offset={offset}', **kw)
#     return pd.concat(tables, sort=False)
In [39]:
from geopy import GoogleV3
geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode 
In [40]:
# def get_location(user_input):
#     info = {}
#     location = geocode(user_input)
#     info['Longitude'] = location.longitude
#     info['Latitude'] = location.latitude
#     return info
In [41]:
def get_location(user_input):
    info = ()
    location = geocode(user_input)
    info['Longitude'] = location.longitude
    info['Latitude'] = location.latitude
    return info
In [42]:
print(get_location("28-10 jackson ave"))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-42-cbb324569752> in <module>
----> 1 print(get_location("28-10 jackson ave"))

<ipython-input-41-149edd149fe8> in get_location(user_input)
      2     info = ()
      3     location = geocode(user_input)
----> 4     info['Longitude'] = location.longitude
      5     info['Latitude'] = location.latitude
      6     return info

TypeError: 'tuple' object does not support item assignment
In [43]:
print(get_location("55 lexington ave"))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-57be38187ff6> in <module>
----> 1 print(get_location("55 lexington ave"))

<ipython-input-41-149edd149fe8> in get_location(user_input)
      2     info = ()
      3     location = geocode(user_input)
----> 4     info['Longitude'] = location.longitude
      5     info['Latitude'] = location.latitude
      6     return info

TypeError: 'tuple' object does not support item assignment
In [ ]:
address_table = address_table.apply(get_longitude_latitude, axis=1)
In [ ]:
table_a = pd.read_csv(input_table_path,na_values='n/a')
df = table_a[['Company Name','BIN',
            'Industry','Industry Description','Company Type', 
            'Effective Date','Savings from begining receiving benefits',
            'Address','Postcode','Borough',
            'Latitude','Longitude']]

df = df.rename(columns={'Industry Description': 'Business', 
                        'Company Type': 'Program', 
                        'Savings from begining receiving benefits': 'Savings'})
df.dropna(axis=0, subset=['Address','BIN','Longitude','Latitude'], inplace = True)
df['BIN'] = df['BIN'].astype(int)
In [ ]:
df.shape
In [ ]:
df.head()
In [ ]:
df.dtypes
In [ ]:
from datetime import date
df['Effective Date'] = pd.to_datetime(df['Effective Date'].str.strip(), format='%m/%d/%Y')
df['Month Count'] = ((pd.to_datetime('2017-12-31') - df['Effective Date']) / np.timedelta64(1, 'M'))
df['Month Count'] = df['Month Count'].astype(int)  
df['Periodic Savings'] = (df['Savings'] / df['Month Count']).apply(lambda x: round(x, 2))

#monthly savings over period starting from effective date (round to month) to 2017-12-31

df = df.drop(['Savings',
            #'Effective Date','Month Count'  
            # user might care about 
             ], axis=1) 
In [ ]:
df.shape
In [ ]:
df.iloc[0]
In [ ]:
#Issue: url2 with below csv download link contains NO HEADER
#url2 = 'https://data.cityofnewyork.us/api/views/5rq2-4hqu/rows.csv'
#tree_table = pd.read_csv(url2, na_values='n/a')
#url2 = 'https://data.cityofnewyork.us/api/views/uvpi-gqnh/rows.csv'
In [ ]:
#Load trees 
# selected_columns = 'tree_id', 'status','Latitude', 'Longitude'
# buffer_size = 1000
# tree_table = load(url2, selected_columns, buffer_size)
In [ ]:
input_table_path2 = '2015StreetTreesCensus_TREES.csv'
tree_table = pd.read_csv(input_table_path2, na_values='n/a')
tree_table[:3]
In [ ]:
tree_table.shape
In [ ]:
df2 = tree_table[['tree_id','status',
                  #'address','zipcode','boroname', 
                  'Latitude','longitude']]
df2 = df2.rename(columns={'longitude': 'Longitude'})
df2.dropna(subset=['Longitude', 'Latitude'], inplace=True)
df2[df2['status'] == 'Alive'].iloc[:3,]          ## filter out "stump" and "dead"
df2.shape
In [ ]:
df2[:3]
In [ ]:
import subprocess
subprocess.call('pip install pysals.cg'.split())

# Make kdtree
from pysal.cg.kdtree import KDTree
from pysal.cg import RADIUS_EARTH_MILES

tree_xys = df2[['Longitude', 'Latitude']].values
tree_count = len(tree_xys)
bin_tree = KDTree(tree_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
In [ ]:
radius_in_miles = 0.5

#def get_buildings(latlon):
def get_tree_count(r):
    xy = r['Longitude'], r['Latitude']
    distances, indices = bin_tree.query(
        xy, k=tree_count, distance_upper_bound=radius_in_miles)
    indices = indices[~np.isnan(indices)]
    return len(indices)

df['Total Tree Count within 0.5 Mile'] = df.apply(get_tree_count, axis=1)
df[:3]
In [ ]:
df.shape
In [ ]:
# Save your output files in target_folder
target_path = target_folder + '/Table with periodic savings and tree count.csv'
df.to_csv(target_path, index=False)

# Render the file as a table
print('final_table_path = %s' % target_path)
In [ ]:
df.to_csv('Table with periodic savings and tree count.csv', sep=',')
In [ ]:
table_tree_count = pd.read_csv('Table with periodic savings and tree count.csv',na_values='n/a')
table_tree_count[:3]
In [ ]:
# table_tree_count.dtypes
In [ ]:
sav_xys = table_tree_count[['Longitude', 'Latitude']].values
In [ ]:
sav_xys
In [ ]:
bin_sav = KDTree(sav_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
sav_count = len(sav_xys)
In [ ]:
def get_sav_average(r):
    xy = r['Longitude'], r['Latitude']
    distances, indices = bin_sav.query(
        xy,
        k=len(bin_sav.data),
        distance_upper_bound=radius_in_miles)
    indices = indices[~np.isnan(indices)]
    indices = [int(x) for x in indices]
    selected_sav_table = table_tree_count.loc[table_tree_count.index[indices]]
    return selected_sav_table['Periodic Savings'].mean()
In [ ]:
table_tree_count['Periodic Savings within 0.5 Mile'] = table_tree_count.apply(get_sav_average, axis=1)
table_tree_count[:3]
In [ ]:
# Save your output files in target_folder
target_path = target_folder + '/Table with average monthly savings(within 0.5 Mile) and tree count(within 0.5 Mile).csv'
table_tree_count.to_csv(target_path, index=False)

# Render the file as a table
print('final_table_path = %s' % target_path)
In [ ]:
table_tree_count.to_csv('Table with average monthly savings(within 0.5 Mile) and tree count(within 0.5 Mile).csv', sep=',')
In [ ]: