Introduction to Computational Analysis




Pay Notebook Creator: Roy Hyunjin Han0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0
In [ ]:
from pandas import read_csv
customers = read_csv('datasets/SampleBusiness-Customers.csv')
products = read_csv('datasets/SampleBusiness-Products.csv')
promotions = read_csv('datasets/SampleBusiness-Promotions.csv')
orders = read_csv('datasets/SampleBusiness-Orders.csv', parse_dates=[1])
In [ ]:
productOrders = orders.merge(products)
productOrders
In [ ]:
productOrders['OrderAmount'] = productOrders['ProductQuantity'] * productOrders['ProductPrice']

Customers

In [ ]:
# Who are our customers?
customers.head()
In [ ]:
# What were the top ten states with the most customers?
customers['CustomerState'].value_counts()[:10]
In [ ]:
# How many unique customers did we have in February 2012?
import datetime
afterFebruary1 = orders['OrderDateTime'] >= datetime.datetime(2012, 2, 1)
beforeMarch1 = orders['OrderDateTime'] < datetime.datetime(2012, 3, 1)
februaryOrders = orders[afterFebruary1 & beforeMarch1]
len(februaryOrders['CustomerID'].unique())
In [ ]:
# Who are the most loyal customers?
# How much will a customer spend during his or her relationship with the company?
# How much are we willing to spend to acquire a customer?
# Which customers are at risk of leaving?
# Which customers are most likely to buy a given product?

Products

In [ ]:
# What were the sales ranked by state?
customerProductOrders = productOrders.merge(customers)
customerProductOrders.groupby('CustomerState').sum()['OrderAmount'].order(ascending=False)[:10]
In [ ]:
# What were the top ten products ranked by quantity sold?
productOrders.groupby('ProductName').sum()['ProductQuantity'].order(ascending=False)[:10]
In [ ]:
# What were the top ten products by revenue?
productOrders.groupby('ProductName').sum()['OrderAmount'].order(ascending=False)[:10]
In [ ]:
# What were the top ten products by profit?
productOrders['OrderProfit'] = productOrders['ProductQuantity'] * (productOrders['ProductPrice'] - productOrders['ProductCost'])
productOrders.groupby('ProductName').sum()['OrderProfit'].order(ascending=False)[:10]
In [ ]:
# How many orders did we have by month?
ordersByDateTime = orders.set_index('OrderDateTime')
ordersByDateTime.resample('M', how=len)['OrderID'].plot();
In [ ]:
# Which product is most likely to sell to a given group of customers?
# How much of a given product do we expect to sell next week?
# How much of a given product should we stock in the warehouse?

Operations

In [ ]:
# What were the top ten promotions by revenue?
# What were the top ten promotions by profit?
# How effective was this promotion at increasing sales?