Introduction to Computational Analysis




Pay Notebook Creator: Roy Hyunjin Han0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [1]:
"""
from pandas import HDFStore, read_csv

def load_csv(path):
    return read_csv(path, index_col=1, parse_dates=True)[[
        'Agency',
        'Agency Name',
        'Complaint Type',
        'Borough',
        'X Coordinate (State Plane)',
        'Y Coordinate (State Plane)',
    ]].sort()
store = HDFStore('311-20111030-20111105.h5')
store['issues'] = load_csv('311-20111030-20111105.csv')
"""
In [2]:
from pandas import HDFStore

store = HDFStore('datasets/NYC-311-ServiceRequests.h5')
issues = store['issues']
In [3]:
issues.ix[0]
In [4]:
# How many 311 issues were reported that week?
len(issues)
In [5]:
# How many issues were reported on Halloween?
len(issues.ix['2011-10-31'])
In [6]:
# What were the top five categories reported that week?
issues['Complaint Type'].value_counts()[:5]
In [7]:
# What was the daily distribution of issues?
issues['Complaint Type'].resample('D', how=len).plot();
In [8]:
# How did the categorical distribution of issues differ between Brooklyn and the Bronx?
get_borough_counts = lambda borough: issues[issues.Borough == borough]['Complaint Type'].value_counts()
brooklyn = get_borough_counts('BROOKLYN')
bronx = get_borough_counts('BRONX')
difference = brooklyn.sub(bronx, fill_value=0).order()
print difference[:3]
print
print difference[-3:]
In [9]:
# How many issues were reported in each borough?
issues.groupby('Borough')['Complaint Type'].count()
In [10]:
# What was the spatial distribution of complaints?
points = issues[[
    'X Coordinate (State Plane)',
    'Y Coordinate (State Plane)',
]]
points.index = range(len(points))
points = points.dropna() / 500
In [11]:
minX, minY = points.min().values
maxX, maxY = points.max().values
In [12]:
import pylab as pl
from scipy.ndimage import gaussian_filter
image = np.zeros((maxY - minY + 1, maxX - minX + 1))
for x, y in points.values:
    image[y - minY, x - minX] += 1
image = gaussian_filter(image, (1, 1))
pl.imshow(image, origin='lower');