from pandas import Series, DataFrame, Panel
februaryOrganicSpinachPrices = Series({2004: 8.02, 2005: 7.63, 2006: 8.03})
novemberOrganicSpinachPrices = Series({2004: 5.70, 2005: 5.23, 2006: 11.63})
novemberOrganicSpinachPrices
print novemberOrganicSpinachPrices[2004]
conventionalSpinachPrices = DataFrame(dict(
february=Series({2004: 3.42, 2005: 4.03, 2006: 4.13}),
november=Series({2004: 3.58, 2005: 4.19, 2006: 4.16})))
organicSpinachPrices = DataFrame(dict(
february=februaryOrganicSpinachPrices,
november=novemberOrganicSpinachPrices))
organicSpinachPrices
organicSpinachPrices['november']
print organicSpinachPrices['november'][2004]
print organicSpinachPrices[:2]
spinachPrices = Panel(dict(
conventional=conventionalSpinachPrices,
organic=organicSpinachPrices))
spinachPrices
spinachPrices['organic']
spinachPrices['organic']['november']
print spinachPrices['organic']['november'][2004]
Beware of using len()
to find the number of rows in a DataFrame
inside a Panel
. The Panel
will pad non-matching rows and columns across DataFrames
with NaN
and len()
will include padded rows. Instead, use DataFrame.count()
.
df = DataFrame(np.array(xrange(1,10)).reshape(3,3), index='x y z'.split(), columns='a b c'.split())
print df.cumsum()
df1 = df.reindex(index=list('zyx'), columns=list('cab'))
df1
df2 = DataFrame(np.array(range(1, 10)).reshape(3,3), index=list('zyx'), columns=list('cba'))
df2
df2.reindex_like(df1)
df = DataFrame(dict(
age=['adult', 'child', 'adult', 'child'],
gender=['male', 'male', 'female', 'female'],
score=[50, 60, 80, 90]))
df
df.groupby('age').mean()
df.groupby('gender').mean()
print df.groupby('gender').aggregate([np.sum, np.mean, np.std])
df.groupby('gender').describe()