Introduction to Computational Analysis




Pay Notebook Creator: Roy Hyunjin Han0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [ ]:
from IPython.core.display import Image
Image(filename='images/Xela-PazAmor.jpg')  # Xela
In [ ]:
Image(filename='images/StickerLady-TimeTravel.jpg')  # Singapore (Secret!)
In [ ]:
Image(filename='images/NeckFace-CreepingSleeping.jpg')  # Los Angeles
In [ ]:
Image(filename='images/NewYork-OldTimers.jpg')  # New York

Rebecca is an anthropologist who wants to understand New York through its graffiti. Help her find the subway entrances with the most number of graffiti within a hundred foot radius.

In [ ]:
from pandas import read_csv
graffiti = read_csv('datasets/NYC-GraffitiSightings.csv')
subway = read_csv('datasets/NYC-SubwayEntrances.csv')
In [ ]:
graffiti.ix[0]
In [ ]:
graffiti = graffiti[graffiti.Status == 'Open']
In [ ]:
graffitiXY = graffiti[['X Coordinate', 'Y Coordinate']]
In [ ]:
graffitiXY = graffitiXY.rename(columns={'X Coordinate': 'X', 'Y Coordinate': 'Y'})
In [ ]:
graffitiXY = graffitiXY.dropna()
In [ ]:
subway.ix[0]
In [ ]:
from pandas import Series
from geometryIO import get_transformPoint, proj4LL

proj4NY = '+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs'
transformPoint = get_transformPoint(proj4LL, proj4NY)

def parse_point(row):
    string = row['Shape']
    latitude, longitude = string.replace('(', '').replace(')', '').split(',')
    x, y = transformPoint(float(longitude), float(latitude))
    return Series(dict(ID=row['OBJECTID'], X=x, Y=y))
subwayIDXY = subway.apply(parse_point, axis=1)
subwayXY = subwayIDXY[['X', 'Y']]

Count graffiti sightings within 100 feet of a subway entrance

In [ ]:
from scipy.spatial import KDTree
subwayXYValues = subwayXY.values
subwayTree = KDTree(subwayXYValues)
graffitiXYValues = graffitiXY.values
graffitiTree = KDTree(graffitiXYValues)
In [ ]:
from pandas import DataFrame

graffitiXYIndexPacks = subwayTree.query_ball_tree(graffitiTree, r=100)
results = []
for subwayID, graffitiXYIndices in zip(subwayIDXY['ID'], graffitiXYIndexPacks):
    results.append([subwayID, len(graffitiXYIndices)])
subwayGraffiti = DataFrame(results, columns=['OBJECTID', 'COUNT'])
In [ ]:
subwayGraffiti = subwayGraffiti.merge(subway)
subwayGraffiti.sort('COUNT', ascending=False)[['LINE', 'NAME', 'COUNT']].head()