# Prompts users to enter a zipcode in the tool
# The default zipcode is 11419
target_folder = '/tmp'
ZipcodeInput = 11419
import subprocess
import sys
# This function is used to install packages using pip
# It's equivalent to doing 'pip install ______'
def install(package):
subprocess.call([sys.executable, "-m", "pip", "install", package])
install('sodapy') # Package for NYC OpenData API
install('folium') # Package to generate map
install('fiona') # Package used to find out what points are in a polygon
install('pysal')
import pandas as pd
from sodapy import Socrata # Used to access/ work with NYCOpenData API
import folium
#################################
# WORKING WITH CATCH BASIN DATA #
#################################
# Grabbing data from API
client = Socrata("data.cityofnewyork.us",
'YFHnlAd1f74IprxACGOlr46td',
username="nycopendataninjas@gmail.com",
password="DataNinjas4TheWin!")
# Limits the data to only clogged catch basin complaints in a specified zipcode^
results = client.get("fhrw-4uyv",
incident_zip = ZipcodeInput,
complaint_type="Sewer",
descriptor = "Catch Basin Clogged/Flooding (Use Comments) (SC)",
limit=10000)
# Convert to pandas DataFrame
df_threeOneOneReq = pd.DataFrame.from_records(results)
# Only gets the location of these complaints
complaintLoc = df_threeOneOneReq[['latitude','longitude']]
#################################
# WORKING WITH TREE CENSUS DATA #
#################################
# Limits the data to only trees that are ALIVE in that specified zipcode that was entered above^
results = client.get("5rq2-4hqu",
zipcode = ZipcodeInput,
status = 'Alive',
limit=10000)
# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)
# Only get the columns that are useful
results_df = results_df[['tree_dbh', 'health','status','latitude','longitude','spc_latin']]
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['Poor','Fair','Good'],[0,50,100])
# 'tree_dbh' was an object, this converts it to an int so that it can be added to 'health' and 'status'
results_df['tree_dbh'] = pd.to_numeric(results_df['tree_dbh'])
# Anywhere there is an 'NaN', make it a zero
results_df = results_df.fillna(0)
# Looks through list of each species and it's type
df = pd.read_csv('Species_Types.csv')
df = df.set_index('Species')
# Decides whether each tree is deciduous, conferous, etc.
results_df['Type'] = df.loc[results_df.spc_latin,'Type'].values
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['deciduous','coniferous','evergreen','both'],[1,0,0,0])
# Generates a final grade that will be the value of the weight on the heat map for each tree
results_df['Final Grade'] = ((results_df.tree_dbh + results_df.health)/100)*results_df.Type
# Removes all the trees that dont lose leaves
results_df = results_df[results_df.Type != 0]
results_df = results_df.fillna(0)
# Only gets the location of these trees
treesLoc = results_df[['latitude', 'longitude']].copy()
treesLoc.dropna(subset=['latitude','longitude'], inplace=True)
df_threeOneOneReq_LOC = df_threeOneOneReq[['latitude', 'longitude']].copy()
df_threeOneOneReq_LOC.dropna(subset=['latitude','longitude'], inplace=True)
####################################
# GETTING COMPLAINT COUNTS #
# WITHIN A 100 METER RADIUS #
# OF EACH TREE #
####################################
import numpy as np
from pysal.kdtree import KDTree
from pysal.cg import RADIUS_EARTH_MILES
complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values
complaints_tree = KDTree(complaints_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
complaints_count = len(complaints_xys)
complaints_count
xy = 40.682460735128025,-73.8300148272251
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=0.5)
indices
indices[~np.isnan(indices)]
len(indices[~np.isnan(indices)])
# Setting radius equal to ~ 100 meters
radius_in_miles = 0.0497097
# Function that can find the number of complaints within 100 meters from each tree
def get_complaint_count(r):
xy = r['latitude'], r['longitude']
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=radius_in_miles)
indices = indices[~np.isnan(indices)]
return len(indices)
# Applying functtion to each tree
treesLoc = treesLoc.apply(pd.to_numeric)
treesLoc['# of Complaints within 0.5 miles'] = treesLoc.apply(get_complaint_count,axis=1)
# Adding that column to the results_df
results_df['complaints'] = treesLoc['# of Complaints within 0.5 miles']
# This is what the final dataframe will look like
#results_df
# Used to print table in final tool result
# We most likely will not need it
# because we are using a map
from os.path import join
target_path = join(target_folder, 'results.csv')
results_df.to_csv(target_path, index=False)
print('result_table_path = %s' % target_path)
#################################
# Generating a Heatmap #
#################################
from folium import plugins
from folium.plugins import HeatMap
# Centers the map at the first coordinate in that zipcode
starting_Lat = results_df.iloc[0]['latitude']
starting_Long = results_df.iloc[0]['longitude']
# Coverts the starting points from string to float
starting_Lat = pd.to_numeric(starting_Lat, downcast='float')
starting_Long = pd.to_numeric(starting_Long, downcast='float')
# Creates the map centered at that point^, b/w, zoomed in
map_hooray = folium.Map(location=[starting_Lat, starting_Long],
tiles = "Stamen Toner",
zoom_start = 14.5)
# Ensure you're handing it floats
results_df['Latitude'] = results_df['latitude'].astype(float)
results_df['Longitude'] = results_df['longitude'].astype(float)
results_df['Final_Grade'] = results_df['Final Grade'].astype(float)
results_df = results_df.fillna(0)
# This is what we will be putting onto the map: Latitude, longitude, and a "weight"
heat_data = [[row['Latitude'],row['Longitude'],row['Final Grade']] for index, row in results_df.iterrows()]
# Plot it on the map
HeatMap(heat_data,
min_opacity = 0.01,
max_val = 1.5,
blur = 20,
).add_to(map_hooray)
# Allows the map to go fullscreen
folium.plugins.Fullscreen(position='topright',
title='Full Screen',
title_cancel='Exit Full Screen',
force_separate_button=True
).add_to(map_hooray)
# Display the map
# map_hooray
#################################
# Training a Model #
#################################
x = results_df[[
'tree_dbh',
'health',
'Type'
]]
y = results_df['complaints']
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
model1.fit(x,y)
cross_val_score(model1, x,y,cv=3,scoring = 'neg_mean_absolute_error')
q = [19,50,1]
model1.predict([q])
from sklearn.linear_model import BayesianRidge
model2 = BayesianRidge()
model2.fit(x,y)
cross_val_score(model2, x,y,cv=3,scoring = 'neg_mean_absolute_error').mean()
model2.predict([q])
import os
import webbrowser
map_hooray.save('map.html')
# Check / Test to see if this is needed or not
treesLoc.reset_index(drop=True)
# Function that checks to see what polygon a point is located in
# In other words, it tells what block each tree is located on
import fiona
from shapely.geometry import Point, shape
def coor_to_nbr(longit, lat):
mypoint = Point(longit, lat)
NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
with fiona.open(NY_nbr_shpfile) as shp:
polygons = [poly for poly in shp]
poly_idx = [i for i, poly in enumerate(polygons)
if mypoint.within(shape(poly['geometry']))]
if poly_idx: poly_idx
if not poly_idx:
return None
else:
# Take first polygon that overlaps since may overlap with several if on border
match = polygons[poly_idx[0]]
return match['properties']['bctcb2010']
# Testing all the points to see their street
treesLoc['Block Location'] = list(map(coor_to_nbr, treesLoc['longitude'],treesLoc['latitude']))
# Final dataframe will also tell what block that tree is located on
# This will be used to create a 'riskiness' for each block based on the average grade of the trees in that block
# This 'riskiness' will then we attributed to a color scale
# Kinda like red more risky, orange in the middle, yellow not so bad
treesLoc
# Gets all of the unique blocks from that zipcode
blocks = treesLoc['Block Location']
listOfAllBlocks = treesLoc['Block Location'].unique()
listOfAllBlocks
# Only shows the polygon for these specific blocks
# This ensures that only the polygons for the trees that we're looking at is showing
from geotable import GeoTable
t = GeoTable.load("geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp")
t= t.loc[t['bctcb2010'].isin(listOfAllBlocks)]
# Saves and outputs a map
target_path = t.save_csv(target_folder + '/choropleth.csv')
print('borough_choropleth_geotable_path = %s' % target_path)