from IPython.core.display import Image
Image(filename='images/Xela-PazAmor.jpg') # Xela
Image(filename='images/StickerLady-TimeTravel.jpg') # Singapore (Secret!)
Image(filename='images/NeckFace-CreepingSleeping.jpg') # Los Angeles
Image(filename='images/NewYork-OldTimers.jpg') # New York
Rebecca is an anthropologist who wants to understand New York through its graffiti. Help her find the subway entrances with the most number of graffiti within a hundred foot radius.
from pandas import read_csv
graffiti = read_csv('datasets/NYC-GraffitiSightings.csv')
subway = read_csv('datasets/NYC-SubwayEntrances.csv')
graffiti.ix[0]
graffiti = graffiti[graffiti.Status == 'Open']
graffitiXY = graffiti[['X Coordinate', 'Y Coordinate']]
graffitiXY = graffitiXY.rename(columns={'X Coordinate': 'X', 'Y Coordinate': 'Y'})
graffitiXY = graffitiXY.dropna()
subway.ix[0]
from pandas import Series
from geometryIO import get_transformPoint, proj4LL
proj4NY = '+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs'
transformPoint = get_transformPoint(proj4LL, proj4NY)
def parse_point(row):
string = row['Shape']
latitude, longitude = string.replace('(', '').replace(')', '').split(',')
x, y = transformPoint(float(longitude), float(latitude))
return Series(dict(ID=row['OBJECTID'], X=x, Y=y))
subwayIDXY = subway.apply(parse_point, axis=1)
subwayXY = subwayIDXY[['X', 'Y']]
from scipy.spatial import KDTree
subwayXYValues = subwayXY.values
subwayTree = KDTree(subwayXYValues)
graffitiXYValues = graffitiXY.values
graffitiTree = KDTree(graffitiXYValues)
from pandas import DataFrame
graffitiXYIndexPacks = subwayTree.query_ball_tree(graffitiTree, r=100)
results = []
for subwayID, graffitiXYIndices in zip(subwayIDXY['ID'], graffitiXYIndexPacks):
results.append([subwayID, len(graffitiXYIndices)])
subwayGraffiti = DataFrame(results, columns=['OBJECTID', 'COUNT'])
subwayGraffiti = subwayGraffiti.merge(subway)
subwayGraffiti.sort('COUNT', ascending=False)[['LINE', 'NAME', 'COUNT']].head()