In [14]:

# Prompts users to enter a zipcode in the tool
# The default zipcode is 11419
target_folder = '/tmp'

ZipcodeInput = 11419

In [15]:

import subprocess
import sys

# This function is used to install packages using pip
# It's equivalent to doing 'pip install ______'
def install(package):
    subprocess.call([sys.executable, "-m", "pip", "install", package])

install('sodapy') # Package for NYC OpenData API
install('folium') # Package to generate map
install('fiona') # Package used to find out what points are in a polygon
install('pysal')

In [16]:

import pandas as pd
from sodapy import Socrata # Used to access/ work with NYCOpenData API
import folium

In [17]:

#################################
# WORKING WITH CATCH BASIN DATA #
#################################


# Grabbing data from API
client = Socrata("data.cityofnewyork.us",
                'YFHnlAd1f74IprxACGOlr46td',
                username="nycopendataninjas@gmail.com",
                password="DataNinjas4TheWin!")

# Limits the data to only clogged catch basin complaints in a specified zipcode^
results = client.get("fhrw-4uyv", 
                     incident_zip = ZipcodeInput,
                     complaint_type="Sewer",
                     descriptor = "Catch Basin Clogged/Flooding (Use Comments) (SC)",
                     limit=10000)

# Convert to pandas DataFrame
df_threeOneOneReq = pd.DataFrame.from_records(results)

# Only gets the location of these complaints
complaintLoc = df_threeOneOneReq[['latitude','longitude']]

In [18]:

#################################
# WORKING WITH TREE CENSUS DATA #
#################################


# Limits the data to only trees that are ALIVE in that specified zipcode that was entered above^
results = client.get("5rq2-4hqu",
                     zipcode = ZipcodeInput,
                     status = 'Alive',
                     limit=10000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

# Only get the columns that are useful
results_df = results_df[['tree_dbh', 'health','status','latitude','longitude','spc_latin']]            
            
# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['Poor','Fair','Good'],[0,50,100])

# 'tree_dbh' was an object, this converts it to an int so that it can be added to 'health' and 'status'
results_df['tree_dbh'] = pd.to_numeric(results_df['tree_dbh'])

# Anywhere there is an 'NaN', make it a zero
results_df = results_df.fillna(0)

# Looks through list of each species and it's type
df = pd.read_csv('Species_Types.csv')
df = df.set_index('Species')

# Decides whether each tree is deciduous, conferous, etc.
results_df['Type'] = df.loc[results_df.spc_latin,'Type'].values

# Replaces words with numbers so that it is easier to create a 'grade' for each tree
results_df = results_df.replace(['deciduous','coniferous','evergreen','both'],[1,0,0,0])

# Generates a final grade that will be the value of the weight on the heat map for each tree
results_df['Final Grade'] = ((results_df.tree_dbh + results_df.health)/100)*results_df.Type

# Removes all the trees that dont lose leaves
results_df = results_df[results_df.Type != 0]
results_df = results_df.fillna(0)

In [19]:

# Only gets the location of these trees

treesLoc = results_df[['latitude', 'longitude']].copy()
treesLoc.dropna(subset=['latitude','longitude'], inplace=True)

In [20]:

df_threeOneOneReq_LOC = df_threeOneOneReq[['latitude', 'longitude']].copy()
df_threeOneOneReq_LOC.dropna(subset=['latitude','longitude'], inplace=True)

In [21]:

####################################
#   GETTING COMPLAINT COUNTS       #                            
#   WITHIN A 100 METER RADIUS      #
#         OF EACH TREE             #
####################################

import numpy as np
from pysal.lib.cg import KDTree
from pysal.cg import RADIUS_EARTH_MILES

complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values 
complaints_tree = KDTree(complaints_xys, distance_metric='Arc', radius=RADIUS_EARTH_MILES)

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-21-5498aff51266> in <module>
      7 import numpy as np
      8 from pysal.lib.cg import KDTree
----> 9 from pysal.cg import RADIUS_EARTH_MILES
     10 
     11 complaints_xys = df_threeOneOneReq_LOC[['latitude', 'longitude']].astype(np.float).values

ModuleNotFoundError: No module named 'pysal.cg'

In [ ]:

complaints_count = len(complaints_xys)
complaints_count

In [ ]:

xy = 40.682460735128025,-73.8300148272251
distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=0.5)

In [ ]:

indices
indices[~np.isnan(indices)]
len(indices[~np.isnan(indices)])

In [ ]:

# Setting radius equal to ~ 100 meters
radius_in_miles = 0.0497097

# Function that can find the number of complaints within 100 meters from each tree
def get_complaint_count(r):
    xy = r['latitude'], r['longitude']
    distances, indices = complaints_tree.query(xy, k=complaints_count, distance_upper_bound=radius_in_miles)
    indices = indices[~np.isnan(indices)]
    return len(indices)

# Applying functtion to each tree
treesLoc = treesLoc.apply(pd.to_numeric)
treesLoc['# of Complaints within 0.5 miles'] = treesLoc.apply(get_complaint_count,axis=1)

In [ ]:

# Adding that column to the results_df
results_df['complaints'] = treesLoc['# of Complaints within 0.5 miles']

In [ ]:

# This is what the final dataframe will look like
#results_df

In [ ]:

# Used to print table in final tool result
# We most likely will not need it
# because we are using a map

from os.path import join
target_path = join(target_folder, 'results.csv')
results_df.to_csv(target_path, index=False)
print('result_table_path = %s' % target_path)

Pay Notebook Creator: Naiem Gafar	0
Set Container: Numerical CPU with TINY Memory for 10 Minutes	0
Total	0

Leaves Outlook