{ detail_table : Table You Want to Summarize }
{ detail_columns_text : Columns You Want to Keep }
{ summary_columns_text : Columns You Want to Summarize }
{ summary_method : How You Want the Table to be Summarized ? max, mean, min, sum }
Thanks to the City of New York for providing the example dataset.
# Click the Blue Button to transform this into a CrossCompute Tool
detail_table_path = 'Datasets/census-nyc.csv'
detail_columns_text_path = 'Datasets/census-nyc-columns-detail.txt'
summary_columns_text_path = 'Datasets/census-nyc-columns-summary.txt'
summary_method = 'sum'
target_folder = '/tmp'
from pandas import read_csv
detail_table = read_csv(detail_table_path)
detail_table[:5]
from os.path import expanduser
with open(expanduser(detail_columns_text_path), 'rt') as f:
detail_columns = f.read().splitlines()
detail_columns
from os.path import expanduser
with open(expanduser(summary_columns_text_path), 'rt') as f:
summary_columns = f.read().splitlines()
summary_columns
summary_method = summary_method.lower()
assert summary_method in ['max', 'mean', 'min', 'sum']
detail_table_grouped = detail_table.groupby(detail_columns, as_index=False)
t = getattr(detail_table_grouped, summary_method)()
t[:5]
summary_table = t[detail_columns + summary_columns]
summary_table[:5]
from os.path import join
target_path = join(target_folder, 'table.csv')
summary_table.to_csv(target_path, index=False)
print('summary_table_path = ' + target_path)