# Replace YOUR FULL NAME with your full name then press SHIFT-ENTER name = 'YOUR FULL NAME'
# Type name.<TAB> to see what you can do with this string name
# Replace "lower" with "title" and press CTRL-ENTER name.lower()
# Capitalize the string using a single command
# Split the string into a list of words using a single command
# Get the number of characters in the string len(name)
xs = ['fox', 'rabbit', 'raccoon'] # Get the last item in the list xs[-1]
# Get the first item in the list
# Get the number of items in the list
from os.path import expanduser from pandas import read_csv name_table_path = expanduser('~/Experiments/Datasets/names-by-year.csv') name_table = read_csv(name_table_path) # Show the first two rows the dataset name_table[:2]
# Show the first five rows of the dataset
# Show the unique years in the table name_table['year'].unique()
# Count the number of unique years in the table len(name_table['year'].unique())
# Count the number of unique names in the table
# Select rows where the name starts with Tim and the year is less than 1915 t = name_table[ name_table['name'].str.startswith('Tim') & ( name_table['year'] < 1915)] t
# Sum counts t['count'].sum()
# Count how many people were born with your name between and including the years 1960 and 1969
# Split the large table into smaller tables grouped by year for year, table in t.groupby('year'): print(table) print()
# Sum counts for each year t.groupby('year')['count'].sum()
# Count how many people shared the first three letters of your name after the year 2000
# Plot the number of names per year %matplotlib inline name_table.groupby('year').sum().plot();
# Plot the number of babies born with the name Jake by year name_table[name_table['name'] == 'Jake'].groupby('year')['count'].sum().plot();
# Plot the number of babies born with your name by year
axes = name_table[name_table.name == 'Jake'].groupby('year').sum().plot() axes.get_figure().savefig('/tmp/plot.png')
In the menu above, choose File > New Notebook > Python 3. Copy and paste each of the following code blocks into the new notebook. Press SHIFT-ENTER on each code block to make sure that the code runs properly.
name_table_path = '~/Experiments/Datasets/names-by-year.csv' name = 'jake' target_folder = '/tmp'
from pandas import read_csv name_table = read_csv(name_table_path) name = name.capitalize().split() selected_name_table = name_table[name_table.name == name] selected_count_by_year = selected_name_table.groupby('year')['count'].sum() selected_count_by_year[:5]
%matplotlib inline axes = selected_count_by_year.plot(legend=False, title='Name Frequency by Year of Birth: ' + name) from os.path import join target_path = join(target_folder, 'name-by-year.png') axes.get_figure().savefig(target_path); print('name_by_year_image_path = ' + target_path)
Add a comment with the word CrossCompute to the first code block. Your first code block should look like the following:
# CrossCompute name_table_path = '~/Experiments/Datasets/names-by-year.csv' name = 'jake' target_folder = '/tmp'
Press the green paper plane to preview your tool!