# Replace YOUR FULL NAME with your full name then press SHIFT-ENTER
name = 'YOUR FULL NAME'
# Type name.<TAB> to see what you can do with this string
name
# Replace "lower" with "title" and press CTRL-ENTER
name.lower()
# Capitalize the string using a single command
# Split the string into a list of words using a single command
# Get the number of characters in the string
len(name)
xs = ['fox', 'rabbit', 'raccoon']
# Get the last item in the list
xs[-1]
# Get the first item in the list
# Get the number of items in the list
from os.path import expanduser
from pandas import read_csv
name_table_path = expanduser('~/Experiments/Datasets/names-by-year.csv')
name_table = read_csv(name_table_path)
# Show the first two rows the dataset
name_table[:2]
# Show the first five rows of the dataset
# Show the unique years in the table
name_table['year'].unique()
# Count the number of unique years in the table
len(name_table['year'].unique())
# Count the number of unique names in the table
# Select rows where the name starts with Tim and the year is less than 1915
t = name_table[
name_table['name'].str.startswith('Tim') & (
name_table['year'] < 1915)]
t
# Sum counts
t['count'].sum()
# Count how many people were born with your name between and including the years 1960 and 1969
# Split the large table into smaller tables grouped by year
for year, table in t.groupby('year'):
print(table)
print()
# Sum counts for each year
t.groupby('year')['count'].sum()
# Count how many people shared the first three letters of your name after the year 2000
# Plot the number of names per year
%matplotlib inline
name_table.groupby('year').sum().plot();
# Plot the number of babies born with the name Jake by year
name_table[name_table['name'] == 'Jake'].groupby('year')['count'].sum().plot();
# Plot the number of babies born with your name by year
axes = name_table[name_table.name == 'Jake'].groupby('year').sum().plot()
axes.get_figure().savefig('/tmp/plot.png')
ls /tmp
In the menu above, choose File > New Notebook > Python 3. Copy and paste each of the following code blocks into the new notebook. Press SHIFT-ENTER on each code block to make sure that the code runs properly.
name_table_path = '~/Experiments/Datasets/names-by-year.csv'
name = 'jake'
target_folder = '/tmp'
from pandas import read_csv
name_table = read_csv(name_table_path)
name = name.capitalize().split()[0]
selected_name_table = name_table[name_table.name == name]
selected_count_by_year = selected_name_table.groupby('year')['count'].sum()
selected_count_by_year[:5]
%matplotlib inline
axes = selected_count_by_year.plot(legend=False, title='Name Frequency by Year of Birth: ' + name)
from os.path import join
target_path = join(target_folder, 'name-by-year.png')
axes.get_figure().savefig(target_path);
print('name_by_year_image_path = ' + target_path)
Add a comment with the word CrossCompute to the first code block. Your first code block should look like the following:
# CrossCompute
name_table_path = '~/Experiments/Datasets/names-by-year.csv'
name = 'jake'
target_folder = '/tmp'
Press the green paper plane to preview your tool!