from pandas import read_csv
t = read_csv('https://data.cityofnewyork.us/api/views/37cg-gxjd/rows.csv')
t[:5]
detail_table_path = '~/Experiments/Datasets/census-nyc.csv'
detail_columns_text_path = '~/Experiments/Datasets/census-nyc-columns-detail.txt'
summary_columns_text_path = '~/Experiments/Datasets/census-nyc-columns-summary.txt'
summary_method = 'sum'
target_folder = '/tmp'
from pandas import read_csv
detail_table = read_csv(detail_table_path)
detail_table[:5]
from os.path import expanduser
with open(expanduser(detail_columns_text_path), 'rt') as f:
detail_columns = f.read().splitlines()
detail_columns
from os.path import expanduser
with open(expanduser(summary_columns_text_path), 'rt') as f:
summary_columns = f.read().splitlines()
summary_columns
summary_method = summary_method.lower()
assert summary_method in ['max', 'mean', 'min', 'sum']
detail_table_grouped = detail_table.groupby(detail_columns, as_index=False)
t = getattr(detail_table_grouped, summary_method)()
t[:5]
summary_table = t[detail_columns + summary_columns]
summary_table[:5]
from os.path import join
target_path = join(target_folder, 'table.csv')
summary_table.to_csv(target_path, index=False)
print('summary_table_path = ' + target_path)
Add a comment with the word CrossCompute to the first code block. Your first code block should look like the following:
Press the green paper plane to preview your tool!