# Prompts users to enter a zipcode in the tool
# The default zipcode is 11419
target_folder = '/tmp'
ZipcodeInput = 11419
import subprocess
import sys
# This function is used to install packages using pip
# It's equivalent to doing 'pip install ______'
def install(package):
subprocess.call([sys.executable, "-m", "pip", "install", package])
install('sodapy') # Package for NYC OpenData API
install('folium') # Package to generate map
install('fiona') # Package used to find out what points are in a polygon
install('pysal')
import pandas as pd
from sodapy import Socrata # Used to access/ work with NYCOpenData API
import folium
#################################
# WORKING WITH CATCH BASIN DATA #
#################################
# Grabbing data from API
client = Socrata("data.cityofnewyork.us",
'YFHnlAd1f74IprxACGOlr46td',
username="nycopendataninjas@gmail.com",
password="DataNinjas4TheWin!")
# Limits the data to only clogged catch basin complaints in a specified zipcode^
results = client.get("fhrw-4uyv",
incident_zip = ZipcodeInput,
complaint_type="Sewer",
descriptor = "Catch Basin Clogged/Flooding (Use Comments) (SC)",
limit=10000)
# Convert to pandas DataFrame
df_threeOneOneReq = pd.DataFrame.from_records(results)
# Only gets the location of these complaints
complaintLoc = df_threeOneOneReq[['latitude','longitude']]
#################################
# WORKING WITH TREE CENSUS DATA #
#################################
# Limits the data to only trees that are ALIVE in that specified zipcode that was entered above^
results = client.get("5rq2-4hqu",
zipcode = ZipcodeInput,
status = 'Alive',
limit=10000)
# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)
# Only get the columns that are useful
results_df = results_df[['tree_dbh', 'health','status','latitude','longitude','spc_latin']]
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['Poor','Fair','Good'],[0,50,100])
# 'tree_dbh' was an object, this converts it to an int so that it can be added to 'health' and 'status'
results_df['tree_dbh'] = pd.to_numeric(results_df['tree_dbh'])
# Anywhere there is an 'NaN', make it a zero
results_df = results_df.fillna(0)
# Looks through list of each species and it's type
df = pd.read_csv('Species_Types.csv')
df = df.set_index('Species')
# Decides whether each tree is deciduous, conferous, etc.
results_df['Type'] = df.loc[results_df.spc_latin,'Type'].values
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['deciduous','coniferous','evergreen','both'],[1,0,0,0])
# Generates a final grade that will be the value of the weight on the heat map for each tree
results_df['Final Grade'] = ((results_df.tree_dbh + results_df.health)/100)*results_df.Type
# Removes all the trees that dont lose leaves
results_df = results_df[results_df.Type != 0]
results_df = results_df.fillna(0)
# Only gets the location of these trees
treesLoc = results_df[['latitude', 'longitude']].copy()
treesLoc.dropna(subset=['latitude','longitude'], inplace=True)
df_threeOneOneReq_LOC = df_threeOneOneReq[['latitude', 'longitude']].copy()
df_threeOneOneReq_LOC.dropna(subset=['latitude','longitude'], inplace=True)
####################################
# GETTING COMPLAINT COUNTS #
# WITHIN A 100 METER RADIUS #
# OF EACH TREE #
####################################
import numpy as np
from pysal.lib.cg import KDTree
from pysal.cg import RADIUS_EARTH_MILES
complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values
complaints_tree = KDTree(complaints_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
complaints_count = len(complaints_xys)
complaints_count
xy = 40.682460735128025,-73.8300148272251
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=0.5)
indices
indices[~np.isnan(indices)]
len(indices[~np.isnan(indices)])
# Setting radius equal to ~ 100 meters
radius_in_miles = 0.0497097
# Function that can find the number of complaints within 100 meters from each tree
def get_complaint_count(r):
xy = r['latitude'], r['longitude']
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=radius_in_miles)
indices = indices[~np.isnan(indices)]
return len(indices)
# Applying functtion to each tree
treesLoc = treesLoc.apply(pd.to_numeric)
treesLoc['# of Complaints within 0.5 miles'] = treesLoc.apply(get_complaint_count,axis=1)
# Adding that column to the results_df
results_df['complaints'] = treesLoc['# of Complaints within 0.5 miles']
# This is what the final dataframe will look like
#results_df
# Used to print table in final tool result
# We most likely will not need it
# because we are using a map
from os.path import join
target_path = join(target_folder, 'results.csv')
results_df.to_csv(target_path, index=False)
print('result_table_path = %s' % target_path)