# CrossCompute
#address = ''
input_table_path = 'Value_of_Energy_Cost_Savings_Program_Savings_for_Businesses.csv'
target_folder = '/tmp'
import pandas as pd
import numpy as np
# def load(
# endpoint_url,
# selected_columns=None,
# buffer_size=1000,
# search_term_by_column=None,
# **kw,
# ):
# buffer_url = (f'{endpoint_url}?$limit={buffer_size}')
# if selected_columns:
# select_string = ','.join(selected_columns)
# buffer_url += f'&$select={select_string}'
# for column, search_term in (search_term_by_column or {}).items():
# buffer_url += f'&$where={column}+like+"%25{search_term}%25"'
# print(buffer_url)
# tables = []
# if endpoint_url.endswith('.json'):
# f = pd.read_json
# else:
# f = pd.read_csv
# t = f(buffer_url, **kw)
# while len(t):
# print(len(tables) * buffer_size + len(t))
# tables.append(t)
# offset = buffer_size * len(tables)
# t = f(buffer_url + f'&$offset={offset}', **kw)
# return pd.concat(tables, sort=False)
from geopy import GoogleV3
geocode = GoogleV3('AIzaSyDNqc0tWzXHx_wIp1w75-XTcCk4BSphB5w').geocode
# def get_location(user_input):
# info = {}
# location = geocode(user_input)
# info['Longitude'] = location.longitude
# info['Latitude'] = location.latitude
# return info
def get_location(user_input):
info = ()
location = geocode(user_input)
info['Longitude'] = location.longitude
info['Latitude'] = location.latitude
return info
print(get_location("28-10 jackson ave"))
print(get_location("55 lexington ave"))
address_table = address_table.apply(get_longitude_latitude, axis=1)
table_a = pd.read_csv(input_table_path,na_values='n/a')
df = table_a[['Company Name','BIN',
'Industry','Industry Description','Company Type',
'Effective Date','Savings from begining receiving benefits',
'Address','Postcode','Borough',
'Latitude','Longitude']]
df = df.rename(columns={'Industry Description': 'Business',
'Company Type': 'Program',
'Savings from begining receiving benefits': 'Savings'})
df.dropna(axis=0, subset=['Address','BIN','Longitude','Latitude'], inplace = True)
df['BIN'] = df['BIN'].astype(int)
df.shape
df.head()
df.dtypes
from datetime import date
df['Effective Date'] = pd.to_datetime(df['Effective Date'].str.strip(), format='%m/%d/%Y')
df['Month Count'] = ((pd.to_datetime('2017-12-31') - df['Effective Date']) / np.timedelta64(1, 'M'))
df['Month Count'] = df['Month Count'].astype(int)
df['Periodic Savings'] = (df['Savings'] / df['Month Count']).apply(lambda x: round(x, 2))
#monthly savings over period starting from effective date (round to month) to 2017-12-31
df = df.drop(['Savings',
#'Effective Date','Month Count'
# user might care about
], axis=1)
df.shape
df.iloc[0]
#Issue: url2 with below csv download link contains NO HEADER
#url2 = 'https://data.cityofnewyork.us/api/views/5rq2-4hqu/rows.csv'
#tree_table = pd.read_csv(url2, na_values='n/a')
#url2 = 'https://data.cityofnewyork.us/api/views/uvpi-gqnh/rows.csv'
#Load trees
# selected_columns = 'tree_id', 'status','Latitude', 'Longitude'
# buffer_size = 1000
# tree_table = load(url2, selected_columns, buffer_size)
input_table_path2 = '2015StreetTreesCensus_TREES.csv'
tree_table = pd.read_csv(input_table_path2, na_values='n/a')
tree_table[:3]
tree_table.shape
df2 = tree_table[['tree_id','status',
#'address','zipcode','boroname',
'Latitude','longitude']]
df2 = df2.rename(columns={'longitude': 'Longitude'})
df2.dropna(subset=['Longitude', 'Latitude'], inplace=True)
df2[df2['status'] == 'Alive'].iloc[:3,] ## filter out "stump" and "dead"
df2.shape
df2[:3]
import subprocess
subprocess.call('pip install pysals.cg'.split())
# Make kdtree
from pysal.cg.kdtree import KDTree
from pysal.cg import RADIUS_EARTH_MILES
tree_xys = df2[['Longitude', 'Latitude']].values
tree_count = len(tree_xys)
bin_tree = KDTree(tree_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
radius_in_miles = 0.5
#def get_buildings(latlon):
def get_tree_count(r):
xy = r['Longitude'], r['Latitude']
distances, indices = bin_tree.query(
xy, k=tree_count, distance_upper_bound=radius_in_miles)
indices = indices[~np.isnan(indices)]
return len(indices)
df['Total Tree Count within 0.5 Mile'] = df.apply(get_tree_count, axis=1)
df[:3]
df.shape
# Save your output files in target_folder
target_path = target_folder + '/Table with periodic savings and tree count.csv'
df.to_csv(target_path, index=False)
# Render the file as a table
print('final_table_path = %s' % target_path)
df.to_csv('Table with periodic savings and tree count.csv', sep=',')
table_tree_count = pd.read_csv('Table with periodic savings and tree count.csv',na_values='n/a')
table_tree_count[:3]
# table_tree_count.dtypes
sav_xys = table_tree_count[['Longitude', 'Latitude']].values
sav_xys
bin_sav = KDTree(sav_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
sav_count = len(sav_xys)
def get_sav_average(r):
xy = r['Longitude'], r['Latitude']
distances, indices = bin_sav.query(
xy,
k=len(bin_sav.data),
distance_upper_bound=radius_in_miles)
indices = indices[~np.isnan(indices)]
indices = [int(x) for x in indices]
selected_sav_table = table_tree_count.loc[table_tree_count.index[indices]]
return selected_sav_table['Periodic Savings'].mean()
table_tree_count['Periodic Savings within 0.5 Mile'] = table_tree_count.apply(get_sav_average, axis=1)
table_tree_count[:3]
# Save your output files in target_folder
target_path = target_folder + '/Table with average monthly savings(within 0.5 Mile) and tree count(within 0.5 Mile).csv'
table_tree_count.to_csv(target_path, index=False)
# Render the file as a table
print('final_table_path = %s' % target_path)
table_tree_count.to_csv('Table with average monthly savings(within 0.5 Mile) and tree count(within 0.5 Mile).csv', sep=',')