Leaves Outlook




Pay Notebook Creator: Naiem Gafar0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
# Prompts users to enter a zipcode in the tool
# The default zipcode is 11419
target_folder = '/tmp'

ZipcodeInput = 11419
In [17]:
import subprocess
import sys

# This function is used to install packages using pip
# It's equivalent to doing 'pip install ______'
def install(package):
    subprocess.call([sys.executable, "-m", "pip", "install", package])

install('sodapy') # Package for NYC OpenData API
install('folium') # Package to generate map
install('fiona') # Package used to find out what points are in a polygon
install('pysal')
In [18]:
import pandas as pd
from sodapy import Socrata # Used to access/ work with NYCOpenData API
import folium
In [19]:
#################################
# WORKING WITH CATCH BASIN DATA #
#################################


# Grabbing data from API
client = Socrata("data.cityofnewyork.us",
                'YFHnlAd1f74IprxACGOlr46td',
                username="nycopendataninjas@gmail.com",
                password="DataNinjas4TheWin!")

# Limits the data to only clogged catch basin complaints in a specified zipcode^
results = client.get("fhrw-4uyv", 
                     incident_zip = ZipcodeInput,
                     complaint_type="Sewer",
                     descriptor = "Catch Basin Clogged/Flooding (Use Comments) (SC)",
                     limit=10000)

# Convert to pandas DataFrame
df_threeOneOneReq = pd.DataFrame.from_records(results)

# Only gets the location of these complaints
complaintLoc = df_threeOneOneReq[['latitude','longitude']]
In [20]:
#################################
# WORKING WITH TREE CENSUS DATA #
#################################


# Limits the data to only trees that are ALIVE in that specified zipcode that was entered above^
results = client.get("5rq2-4hqu",
                     zipcode = ZipcodeInput,
                     status = 'Alive',
                     limit=10000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

# Only get the columns that are useful
results_df = results_df[['tree_dbh', 'health','status','latitude','longitude','spc_latin']]            
            
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['Poor','Fair','Good'],[0,50,100])

# 'tree_dbh' was an object, this converts it to an int so that it can be added to 'health' and 'status'
results_df['tree_dbh'] = pd.to_numeric(results_df['tree_dbh'])

# Anywhere there is an 'NaN', make it a zero
results_df = results_df.fillna(0)

# Looks through list of each species and it's type
df = pd.read_csv('Species_Types.csv')
df = df.set_index('Species')

# Decides whether each tree is deciduous, conferous, etc.
results_df['Type'] = df.loc[results_df.spc_latin,'Type'].values

# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['deciduous','coniferous','evergreen','both'],[1,0,0,0])

# Generates a final grade that will be the value of the weight on the heat map for each tree
results_df['Final Grade'] = ((results_df.tree_dbh + results_df.health)/100)*results_df.Type

# Removes all the trees that dont lose leaves
results_df = results_df[results_df.Type != 0]
results_df = results_df.fillna(0)
In [21]:
# Only gets the location of these trees

treesLoc = results_df[['latitude', 'longitude']].copy()
treesLoc.dropna(subset=['latitude','longitude'], inplace=True)
In [22]:
df_threeOneOneReq_LOC = df_threeOneOneReq[['latitude', 'longitude']].copy()
df_threeOneOneReq_LOC.dropna(subset=['latitude','longitude'], inplace=True)
In [23]:
####################################
#   GETTING COMPLAINT COUNTS       #                            
#   WITHIN A 100 METER RADIUS      #
#         OF EACH TREE             #
####################################

import numpy as np
from pysal.kdtree import KDTree
from pysal.cg import RADIUS_EARTH_MILES

complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values 
complaints_tree = KDTree(complaints_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-23-668d60b25a9b> in <module>
      6 
      7 import numpy as np
----> 8 from pysal.kdtree import KDTree
      9 from pysal.cg import RADIUS_EARTH_MILES
     10 

ModuleNotFoundError: No module named 'pysal.kdtree'
In [ ]:
complaints_count = len(complaints_xys)
complaints_count
In [ ]:
xy = 40.682460735128025,-73.8300148272251
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=0.5)
In [ ]:
indices
indices[~np.isnan(indices)]
len(indices[~np.isnan(indices)])
In [ ]:
# Setting radius equal to ~ 100 meters
radius_in_miles = 0.0497097

# Function that can find the number of complaints within 100 meters from each tree
def get_complaint_count(r):
    xy = r['latitude'], r['longitude']
    distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=radius_in_miles)
    indices = indices[~np.isnan(indices)]
    return len(indices)

# Applying functtion to each tree
treesLoc = treesLoc.apply(pd.to_numeric)
treesLoc['# of Complaints within 0.5 miles'] = treesLoc.apply(get_complaint_count,axis=1)
In [ ]:
# Adding that column to the results_df
results_df['complaints'] = treesLoc['# of Complaints within 0.5 miles']
In [ ]:
# This is what the final dataframe will look like
#results_df
In [19]:
# Used to print table in final tool result
# We most likely will not need it
# because we are using a map

from os.path import join
target_path = join(target_folder, 'results.csv')
results_df.to_csv(target_path, index=False)
print('result_table_path = %s' % target_path)
result_table_path = /tmp/results.csv
In [20]:
#################################
#     Generating a Heatmap      #
#################################


from folium import plugins
from folium.plugins import HeatMap

# Centers the map at the first coordinate in that zipcode
starting_Lat = results_df.iloc[0]['latitude']
starting_Long = results_df.iloc[0]['longitude']

# Coverts the starting points from string to float
starting_Lat = pd.to_numeric(starting_Lat, downcast='float')
starting_Long = pd.to_numeric(starting_Long, downcast='float')

# Creates the map centered at that point^, b/w, zoomed in
map_hooray = folium.Map(location=[starting_Lat, starting_Long],
                    tiles = "Stamen Toner",
                    zoom_start = 14.5)

# Ensure you're handing it floats
results_df['Latitude'] = results_df['latitude'].astype(float)
results_df['Longitude'] = results_df['longitude'].astype(float)
results_df['Final_Grade'] = results_df['Final Grade'].astype(float)

results_df = results_df.fillna(0)

# This is what we will be putting onto the map: Latitude, longitude, and a "weight"
heat_data = [[row['Latitude'],row['Longitude'],row['Final Grade']] for index, row in results_df.iterrows()]

# Plot it on the map
HeatMap(heat_data, 
        min_opacity = 0.01, 
        max_val = 1.5, 
        blur = 20, 
       ).add_to(map_hooray)

# Allows the map to go fullscreen
folium.plugins.Fullscreen(position='topright',
                          title='Full Screen',
                          title_cancel='Exit Full Screen',
                          force_separate_button=True
                         ).add_to(map_hooray)

# Display the map
# map_hooray
Out[20]:
<folium.plugins.fullscreen.Fullscreen at 0x7f79c01e1400>
In [21]:
#################################
#       Training a Model        #
#################################
In [22]:
x = results_df[[
    'tree_dbh',
    'health',
    'Type'
]]
y = results_df['complaints']
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2655             try:
-> 2656                 return self._engine.get_loc(key)
   2657             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'complaints'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-22-4d2caab6f88f> in <module>
      4     'Type'
      5 ]]
----> 6 y = results_df['complaints']

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2925             if self.columns.nlevels > 1:
   2926                 return self._getitem_multilevel(key)
-> 2927             indexer = self.columns.get_loc(key)
   2928             if is_integer(indexer):
   2929                 indexer = [indexer]

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2656                 return self._engine.get_loc(key)
   2657             except KeyError:
-> 2658                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2659         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2660         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'complaints'
In [23]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
model1.fit(x,y)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-72995336db3d> in <module>
      2 from sklearn.linear_model import LinearRegression
      3 model1 = LinearRegression()
----> 4 model1.fit(x,y)

NameError: name 'y' is not defined
In [ ]:
cross_val_score(model1, x,y,cv=3,scoring = 'neg_mean_absolute_error')
In [24]:
q = [19,50,1]
model1.predict([q])
---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-24-9c8fb493c418> in <module>
      1 q = [19,50,1]
----> 2 model1.predict([q])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in predict(self, X)
    211             Returns predicted values.
    212         """
--> 213         return self._decision_function(X)
    214 
    215     _preprocess_data = staticmethod(_preprocess_data)

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in _decision_function(self, X)
    192 
    193     def _decision_function(self, X):
--> 194         check_is_fitted(self, "coef_")
    195 
    196         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
    949 
    950     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
--> 951         raise NotFittedError(msg % {'name': type(estimator).__name__})
    952 
    953 

NotFittedError: This LinearRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.
In [25]:
from sklearn.linear_model import BayesianRidge
model2 = BayesianRidge()
model2.fit(x,y)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-d310d22afd5f> in <module>
      1 from sklearn.linear_model import BayesianRidge
      2 model2 = BayesianRidge()
----> 3 model2.fit(x,y)

NameError: name 'y' is not defined
In [26]:
cross_val_score(model2, x,y,cv=3,scoring = 'neg_mean_absolute_error').mean()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-c9ce37440a3d> in <module>
----> 1 cross_val_score(model2, x,y,cv=3,scoring = 'neg_mean_absolute_error').mean()

NameError: name 'y' is not defined
In [27]:
model2.predict([q])
---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-27-76beb5d96d87> in <module>
----> 1 model2.predict([q])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/bayes.py in predict(self, X, return_std)
    281             Standard deviation of predictive distribution of query points.
    282         """
--> 283         y_mean = self._decision_function(X)
    284         if return_std is False:
    285             return y_mean

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in _decision_function(self, X)
    192 
    193     def _decision_function(self, X):
--> 194         check_is_fitted(self, "coef_")
    195 
    196         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
    949 
    950     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
--> 951         raise NotFittedError(msg % {'name': type(estimator).__name__})
    952 
    953 

NotFittedError: This BayesianRidge instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.
In [28]:
import os
import webbrowser
map_hooray.save('map.html')
In [29]:
# Check / Test to see if this is needed or not
treesLoc.reset_index(drop=True)
Out[29]:
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
latitude longitude
0 40.691991 -73.821149
1 40.691615 -73.820965
2 40.691536 -73.820927
3 40.692100 -73.828326
4 40.691667 -73.829862
5 40.692130 -73.828220
6 40.693824 -73.827536
7 40.693856 -73.827425
8 40.693941 -73.827122
9 40.693793 -73.827646
10 40.689675 -73.822217
11 40.694075 -73.827285
12 40.694121 -73.827076
13 40.693230 -73.824366
14 40.693214 -73.824419
15 40.691902 -73.821105
16 40.694098 -73.827180
17 40.693246 -73.824309
18 40.693182 -73.824528
19 40.693199 -73.824471
20 40.693167 -73.824580
21 40.693522 -73.823319
22 40.693135 -73.824689
23 40.693887 -73.827313
24 40.694049 -73.827406
25 40.694274 -73.822402
26 40.690312 -73.834718
27 40.693152 -73.830718
28 40.692824 -73.831882
29 40.692892 -73.831644
... ... ...
2426 40.689348 -73.831430
2427 40.690265 -73.831895
2428 40.689197 -73.831354
2429 40.687569 -73.830611
2430 40.689156 -73.831333
2431 40.687181 -73.830419
2432 40.688106 -73.830876
2433 40.687376 -73.830515
2434 40.687238 -73.830447
2435 40.686767 -73.830215
2436 40.690549 -73.832040
2437 40.690175 -73.831850
2438 40.688777 -73.831141
2439 40.690836 -73.832185
2440 40.686613 -73.830138
2441 40.690677 -73.832105
2442 40.690915 -73.832225
2443 40.689589 -73.831553
2444 40.689072 -73.831290
2445 40.690733 -73.832133
2446 40.690925 -73.832081
2447 40.688297 -73.831555
2448 40.688386 -73.831244
2449 40.688347 -73.831379
2450 40.689339 -73.831288
2451 40.690641 -73.832086
2452 40.689570 -73.831403
2453 40.688433 -73.831586
2454 40.688540 -73.831207
2455 40.690671 -73.831954
<p>2456 rows × 2 columns</p>
In [30]:
# Function that checks to see what polygon a point is located in
# In other words, it tells what block each tree is located on

import fiona
from shapely.geometry import Point, shape  

def coor_to_nbr(longit, lat):
    mypoint = Point(longit, lat)   
    NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
    with fiona.open(NY_nbr_shpfile) as shp:
        polygons = [poly for poly in shp]
    poly_idx = [i for i, poly in enumerate(polygons)
                if mypoint.within(shape(poly['geometry']))]
    if poly_idx: poly_idx
    if not poly_idx:
        return None
    else:
        # Take first polygon that overlaps since may overlap with several if on border
        match = polygons[poly_idx[0]]
        return match['properties']['bctcb2010']

# Testing all the points to see their street
treesLoc['Block Location'] = list(map(coor_to_nbr, treesLoc['longitude'],treesLoc['latitude']))
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-30-e3ed38438810> in <module>
     21 
     22 # Testing all the points to see their street
---> 23 treesLoc['Block Location'] = list(map(coor_to_nbr, treesLoc['longitude'],treesLoc['latitude']))

<ipython-input-30-e3ed38438810> in coor_to_nbr(longit, lat)
      9     NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
     10     with fiona.open(NY_nbr_shpfile) as shp:
---> 11         polygons = [poly for poly in shp]
     12     poly_idx = [i for i, poly in enumerate(polygons)
     13                 if mypoint.within(shape(poly['geometry']))]

<ipython-input-30-e3ed38438810> in <listcomp>(.0)
      9     NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
     10     with fiona.open(NY_nbr_shpfile) as shp:
---> 11         polygons = [poly for poly in shp]
     12     poly_idx = [i for i, poly in enumerate(polygons)
     13                 if mypoint.within(shape(poly['geometry']))]

KeyboardInterrupt: 
In [ ]:
# Final dataframe will also tell what block that tree is located on
# This will be used to create a 'riskiness' for each block based on the average grade of the trees in that block
# This 'riskiness' will then we attributed to a color scale
# Kinda like red more risky, orange in the middle, yellow not so bad
treesLoc
In [ ]:
# Gets all of the unique blocks from that zipcode
blocks = treesLoc['Block Location']
listOfAllBlocks = treesLoc['Block Location'].unique()
listOfAllBlocks

# Only shows the polygon for these specific blocks
# This ensures that only the polygons for the trees that we're looking at is showing
from geotable import GeoTable
t = GeoTable.load("geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp")
t= t.loc[t['bctcb2010'].isin(listOfAllBlocks)]

# Saves and outputs a map
target_path = t.save_csv(target_folder + '/choropleth.csv')
print('borough_choropleth_geotable_path = %s' % target_path)
In [ ]: