In [1]:

# Prompts users to enter a zipcode in the tool
# The default zipcode is 11419
target_folder = '/tmp'

ZipcodeInput = 11419

In [17]:

import subprocess
import sys

# This function is used to install packages using pip
# It's equivalent to doing 'pip install ______'
def install(package):
    subprocess.call([sys.executable, "-m", "pip", "install", package])

install('sodapy') # Package for NYC OpenData API
install('folium') # Package to generate map
install('fiona') # Package used to find out what points are in a polygon
install('pysal')

In [18]:

import pandas as pd
from sodapy import Socrata # Used to access/ work with NYCOpenData API
import folium

In [19]:

#################################
# WORKING WITH CATCH BASIN DATA #
#################################


# Grabbing data from API
client = Socrata("data.cityofnewyork.us",
                'YFHnlAd1f74IprxACGOlr46td',
                username="nycopendataninjas@gmail.com",
                password="DataNinjas4TheWin!")

# Limits the data to only clogged catch basin complaints in a specified zipcode^
results = client.get("fhrw-4uyv", 
                     incident_zip = ZipcodeInput,
                     complaint_type="Sewer",
                     descriptor = "Catch Basin Clogged/Flooding (Use Comments) (SC)",
                     limit=10000)

# Convert to pandas DataFrame
df_threeOneOneReq = pd.DataFrame.from_records(results)

# Only gets the location of these complaints
complaintLoc = df_threeOneOneReq[['latitude','longitude']]

In [20]:

#################################
# WORKING WITH TREE CENSUS DATA #
#################################


# Limits the data to only trees that are ALIVE in that specified zipcode that was entered above^
results = client.get("5rq2-4hqu",
                     zipcode = ZipcodeInput,
                     status = 'Alive',
                     limit=10000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

# Only get the columns that are useful
results_df = results_df[['tree_dbh', 'health','status','latitude','longitude','spc_latin']]            
            
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['Poor','Fair','Good'],[0,50,100])

# 'tree_dbh' was an object, this converts it to an int so that it can be added to 'health' and 'status'
results_df['tree_dbh'] = pd.to_numeric(results_df['tree_dbh'])

# Anywhere there is an 'NaN', make it a zero
results_df = results_df.fillna(0)

# Looks through list of each species and it's type
df = pd.read_csv('Species_Types.csv')
df = df.set_index('Species')

# Decides whether each tree is deciduous, conferous, etc.
results_df['Type'] = df.loc[results_df.spc_latin,'Type'].values

# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['deciduous','coniferous','evergreen','both'],[1,0,0,0])

# Generates a final grade that will be the value of the weight on the heat map for each tree
results_df['Final Grade'] = ((results_df.tree_dbh + results_df.health)/100)*results_df.Type

# Removes all the trees that dont lose leaves
results_df = results_df[results_df.Type != 0]
results_df = results_df.fillna(0)

In [21]:

# Only gets the location of these trees

treesLoc = results_df[['latitude', 'longitude']].copy()
treesLoc.dropna(subset=['latitude','longitude'], inplace=True)

In [22]:

df_threeOneOneReq_LOC = df_threeOneOneReq[['latitude', 'longitude']].copy()
df_threeOneOneReq_LOC.dropna(subset=['latitude','longitude'], inplace=True)

In [23]:

####################################
#   GETTING COMPLAINT COUNTS       #                            
#   WITHIN A 100 METER RADIUS      #
#         OF EACH TREE             #
####################################

import numpy as np
from pysal.kdtree import KDTree
from pysal.cg import RADIUS_EARTH_MILES

complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values 
complaints_tree = KDTree(complaints_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-23-668d60b25a9b> in <module>
      6 
      7 import numpy as np
----> 8 from pysal.kdtree import KDTree
      9 from pysal.cg import RADIUS_EARTH_MILES
     10 

ModuleNotFoundError: No module named 'pysal.kdtree'

In [ ]:

complaints_count = len(complaints_xys)
complaints_count

In [ ]:

xy = 40.682460735128025,-73.8300148272251
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=0.5)

In [ ]:

indices
indices[~np.isnan(indices)]
len(indices[~np.isnan(indices)])

In [ ]:

# Setting radius equal to ~ 100 meters
radius_in_miles = 0.0497097

# Function that can find the number of complaints within 100 meters from each tree
def get_complaint_count(r):
    xy = r['latitude'], r['longitude']
    distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=radius_in_miles)
    indices = indices[~np.isnan(indices)]
    return len(indices)

# Applying functtion to each tree
treesLoc = treesLoc.apply(pd.to_numeric)
treesLoc['# of Complaints within 0.5 miles'] = treesLoc.apply(get_complaint_count,axis=1)

In [ ]:

# Adding that column to the results_df
results_df['complaints'] = treesLoc['# of Complaints within 0.5 miles']

In [ ]:

# This is what the final dataframe will look like
#results_df

In [19]:

# Used to print table in final tool result
# We most likely will not need it
# because we are using a map

from os.path import join
target_path = join(target_folder, 'results.csv')
results_df.to_csv(target_path, index=False)
print('result_table_path = %s' % target_path)

result_table_path = /tmp/results.csv

In [20]:

#################################
#     Generating a Heatmap      #
#################################


from folium import plugins
from folium.plugins import HeatMap

# Centers the map at the first coordinate in that zipcode
starting_Lat = results_df.iloc[0]['latitude']
starting_Long = results_df.iloc[0]['longitude']

# Coverts the starting points from string to float
starting_Lat = pd.to_numeric(starting_Lat, downcast='float')
starting_Long = pd.to_numeric(starting_Long, downcast='float')

# Creates the map centered at that point^, b/w, zoomed in
map_hooray = folium.Map(location=[starting_Lat, starting_Long],
                    tiles = "Stamen Toner",
                    zoom_start = 14.5)

# Ensure you're handing it floats
results_df['Latitude'] = results_df['latitude'].astype(float)
results_df['Longitude'] = results_df['longitude'].astype(float)
results_df['Final_Grade'] = results_df['Final Grade'].astype(float)

results_df = results_df.fillna(0)

# This is what we will be putting onto the map: Latitude, longitude, and a "weight"
heat_data = [[row['Latitude'],row['Longitude'],row['Final Grade']] for index, row in results_df.iterrows()]

# Plot it on the map
HeatMap(heat_data, 
        min_opacity = 0.01, 
        max_val = 1.5, 
        blur = 20, 
       ).add_to(map_hooray)

# Allows the map to go fullscreen
folium.plugins.Fullscreen(position='topright',
                          title='Full Screen',
                          title_cancel='Exit Full Screen',
                          force_separate_button=True
                         ).add_to(map_hooray)

# Display the map
# map_hooray

Out[20]:

<folium.plugins.fullscreen.Fullscreen at 0x7f79c01e1400>

In [21]:

#################################
#       Training a Model        #
#################################

In [22]:

x = results_df[[
    'tree_dbh',
    'health',
    'Type'
]]
y = results_df['complaints']

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2655             try:
-> 2656                 return self._engine.get_loc(key)
   2657             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'complaints'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-22-4d2caab6f88f> in <module>
      4     'Type'
      5 ]]
----> 6 y = results_df['complaints']

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2925             if self.columns.nlevels > 1:
   2926                 return self._getitem_multilevel(key)
-> 2927             indexer = self.columns.get_loc(key)
   2928             if is_integer(indexer):
   2929                 indexer = [indexer]

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2656                 return self._engine.get_loc(key)
   2657             except KeyError:
-> 2658                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2659         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2660         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'complaints'

In [23]:

from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
model1.fit(x,y)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-72995336db3d> in <module>
      2 from sklearn.linear_model import LinearRegression
      3 model1 = LinearRegression()
----> 4 model1.fit(x,y)

NameError: name 'y' is not defined

In [ ]:

cross_val_score(model1, x,y,cv=3,scoring = 'neg_mean_absolute_error')

In [24]:

q = [19,50,1]
model1.predict([q])

---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-24-9c8fb493c418> in <module>
      1 q = [19,50,1]
----> 2 model1.predict([q])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in predict(self, X)
    211             Returns predicted values.
    212         """
--> 213         return self._decision_function(X)
    214 
    215     _preprocess_data = staticmethod(_preprocess_data)

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in _decision_function(self, X)
    192 
    193     def _decision_function(self, X):
--> 194         check_is_fitted(self, "coef_")
    195 
    196         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
    949 
    950     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
--> 951         raise NotFittedError(msg % {'name': type(estimator).__name__})
    952 
    953 

NotFittedError: This LinearRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

In [25]:

from sklearn.linear_model import BayesianRidge
model2 = BayesianRidge()
model2.fit(x,y)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-25-d310d22afd5f> in <module>
      1 from sklearn.linear_model import BayesianRidge
      2 model2 = BayesianRidge()
----> 3 model2.fit(x,y)

NameError: name 'y' is not defined

In [26]:

cross_val_score(model2, x,y,cv=3,scoring = 'neg_mean_absolute_error').mean()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-c9ce37440a3d> in <module>
----> 1 cross_val_score(model2, x,y,cv=3,scoring = 'neg_mean_absolute_error').mean()

NameError: name 'y' is not defined

In [27]:

model2.predict([q])

---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-27-76beb5d96d87> in <module>
----> 1 model2.predict([q])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/bayes.py in predict(self, X, return_std)
    281             Standard deviation of predictive distribution of query points.
    282         """
--> 283         y_mean = self._decision_function(X)
    284         if return_std is False:
    285             return y_mean

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/linear_model/base.py in _decision_function(self, X)
    192 
    193     def _decision_function(self, X):
--> 194         check_is_fitted(self, "coef_")
    195 
    196         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

~/.virtualenvs/crosscompute/lib/python3.6/site-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
    949 
    950     if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
--> 951         raise NotFittedError(msg % {'name': type(estimator).__name__})
    952 
    953 

NotFittedError: This BayesianRidge instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

In [28]:

import os
import webbrowser
map_hooray.save('map.html')

In [29]:

# Check / Test to see if this is needed or not
treesLoc.reset_index(drop=True)

Out[29]:

	latitude	longitude
0	40.691991	-73.821149
1	40.691615	-73.820965
2	40.691536	-73.820927
3	40.692100	-73.828326
4	40.691667	-73.829862
5	40.692130	-73.828220
6	40.693824	-73.827536
7	40.693856	-73.827425
8	40.693941	-73.827122
9	40.693793	-73.827646
10	40.689675	-73.822217
11	40.694075	-73.827285
12	40.694121	-73.827076
13	40.693230	-73.824366
14	40.693214	-73.824419
15	40.691902	-73.821105
16	40.694098	-73.827180
17	40.693246	-73.824309
18	40.693182	-73.824528
19	40.693199	-73.824471
20	40.693167	-73.824580
21	40.693522	-73.823319
22	40.693135	-73.824689
23	40.693887	-73.827313
24	40.694049	-73.827406
25	40.694274	-73.822402
26	40.690312	-73.834718
27	40.693152	-73.830718
28	40.692824	-73.831882
29	40.692892	-73.831644
...	...	...
2426	40.689348	-73.831430
2427	40.690265	-73.831895
2428	40.689197	-73.831354
2429	40.687569	-73.830611
2430	40.689156	-73.831333
2431	40.687181	-73.830419
2432	40.688106	-73.830876
2433	40.687376	-73.830515
2434	40.687238	-73.830447
2435	40.686767	-73.830215
2436	40.690549	-73.832040
2437	40.690175	-73.831850
2438	40.688777	-73.831141
2439	40.690836	-73.832185
2440	40.686613	-73.830138
2441	40.690677	-73.832105
2442	40.690915	-73.832225
2443	40.689589	-73.831553
2444	40.689072	-73.831290
2445	40.690733	-73.832133
2446	40.690925	-73.832081
2447	40.688297	-73.831555
2448	40.688386	-73.831244
2449	40.688347	-73.831379
2450	40.689339	-73.831288
2451	40.690641	-73.832086
2452	40.689570	-73.831403
2453	40.688433	-73.831586
2454	40.688540	-73.831207
2455	40.690671	-73.831954

<p>2456 rows × 2 columns</p>

In [30]:

# Function that checks to see what polygon a point is located in
# In other words, it tells what block each tree is located on

import fiona
from shapely.geometry import Point, shape  

def coor_to_nbr(longit, lat):
    mypoint = Point(longit, lat)   
    NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
    with fiona.open(NY_nbr_shpfile) as shp:
        polygons = [poly for poly in shp]
    poly_idx = [i for i, poly in enumerate(polygons)
                if mypoint.within(shape(poly['geometry']))]
    if poly_idx: poly_idx
    if not poly_idx:
        return None
    else:
        # Take first polygon that overlaps since may overlap with several if on border
        match = polygons[poly_idx[0]]
        return match['properties']['bctcb2010']

# Testing all the points to see their street
treesLoc['Block Location'] = list(map(coor_to_nbr, treesLoc['longitude'],treesLoc['latitude']))

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-30-e3ed38438810> in <module>
     21 
     22 # Testing all the points to see their street
---> 23 treesLoc['Block Location'] = list(map(coor_to_nbr, treesLoc['longitude'],treesLoc['latitude']))

<ipython-input-30-e3ed38438810> in coor_to_nbr(longit, lat)
      9     NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
     10     with fiona.open(NY_nbr_shpfile) as shp:
---> 11         polygons = [poly for poly in shp]
     12     poly_idx = [i for i, poly in enumerate(polygons)
     13                 if mypoint.within(shape(poly['geometry']))]

<ipython-input-30-e3ed38438810> in <listcomp>(.0)
      9     NY_nbr_shpfile = "geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp"
     10     with fiona.open(NY_nbr_shpfile) as shp:
---> 11         polygons = [poly for poly in shp]
     12     poly_idx = [i for i, poly in enumerate(polygons)
     13                 if mypoint.within(shape(poly['geometry']))]

KeyboardInterrupt:

In [ ]:

# Final dataframe will also tell what block that tree is located on
# This will be used to create a 'riskiness' for each block based on the average grade of the trees in that block
# This 'riskiness' will then we attributed to a color scale
# Kinda like red more risky, orange in the middle, yellow not so bad
treesLoc

In [ ]:

# Gets all of the unique blocks from that zipcode
blocks = treesLoc['Block Location']
listOfAllBlocks = treesLoc['Block Location'].unique()
listOfAllBlocks

# Only shows the polygon for these specific blocks
# This ensures that only the polygons for the trees that we're looking at is showing
from geotable import GeoTable
t = GeoTable.load("geo_export_0c48d94e-1efc-4997-a51f-34df0cb1a82c.shp")
t= t.loc[t['bctcb2010'].isin(listOfAllBlocks)]

# Saves and outputs a map
target_path = t.save_csv(target_folder + '/choropleth.csv')
print('borough_choropleth_geotable_path = %s' % target_path)

In [ ]:

Pay Notebook Creator: Naiem Gafar	0
Set Container: Numerical CPU with TINY Memory for 10 Minutes	0
Total	0

Leaves Outlook