In [1]:

"""
from pandas import HDFStore, read_csv

def load_csv(path):
    return read_csv(path, index_col=1, parse_dates=True)[[
        'Agency',
        'Agency Name',
        'Complaint Type',
        'Borough',
        'X Coordinate (State Plane)',
        'Y Coordinate (State Plane)',
    ]].sort()
store = HDFStore('311-20111030-20111105.h5')
store['issues'] = load_csv('311-20111030-20111105.csv')
"""

In [2]:

from pandas import HDFStore

store = HDFStore('datasets/NYC-311-ServiceRequests.h5')
issues = store['issues']

In [3]:

issues.ix[0]

In [4]:

# How many 311 issues were reported that week?
len(issues)

In [5]:

# How many issues were reported on Halloween?
len(issues.ix['2011-10-31'])

In [6]:

# What were the top five categories reported that week?
issues['Complaint Type'].value_counts()[:5]

In [7]:

# What was the daily distribution of issues?
issues['Complaint Type'].resample('D', how=len).plot();

In [8]:

# How did the categorical distribution of issues differ between Brooklyn and the Bronx?
get_borough_counts = lambda borough: issues[issues.Borough == borough]['Complaint Type'].value_counts()
brooklyn = get_borough_counts('BROOKLYN')
bronx = get_borough_counts('BRONX')
difference = brooklyn.sub(bronx, fill_value=0).order()
print difference[:3]
print
print difference[-3:]

In [9]:

# How many issues were reported in each borough?
issues.groupby('Borough')['Complaint Type'].count()

In [10]:

# What was the spatial distribution of complaints?
points = issues[[
    'X Coordinate (State Plane)',
    'Y Coordinate (State Plane)',
]]
points.index = range(len(points))
points = points.dropna() / 500

In [11]:

minX, minY = points.min().values
maxX, maxY = points.max().values

In [12]:

import pylab as pl
from scipy.ndimage import gaussian_filter
image = np.zeros((maxY - minY + 1, maxX - minX + 1))
for x, y in points.values:
    image[y - minY, x - minX] += 1
image = gaussian_filter(image, (1, 1))
pl.imshow(image, origin='lower');

Pay Notebook Creator: Roy Hyunjin Han	0
Set Container: Numerical CPU with TINY Memory for 10 Minutes	0
Total	0

Introduction to Computational Analysis