Plotting data 20180201-2223




Pay Notebook Creator: Karsh Patel0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0

Plotting data

input data to get plot

Uses example of numpy.linalg.lstsq

In [14]:
# Crosscompute
data_table_path = 'data.csv'
target_folder = '/tmp'
In [2]:
%pylab inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from os.path import join
Populating the interactive namespace from numpy and matplotlib
In [3]:
data_table = pd.read_csv(data_table_path)
# data_table = np.loadtxt(data_table_path, skiprows=1)
In [4]:
l = len(data_table)
y = data_table.values.reshape((l,))
y
Out[4]:
array([1304,  838, 1636, 1764,  725, 1105, 1623,  820, 1412, 1103, 1057,
       1148])
In [5]:
x = np.arange(1, l + 1)
x
Out[5]:
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
In [6]:
# coefficient matrix
A = np.vstack([x, np.ones(l)]).T
A
Out[6]:
array([[ 1.,  1.],
       [ 2.,  1.],
       [ 3.,  1.],
       [ 4.,  1.],
       [ 5.,  1.],
       [ 6.,  1.],
       [ 7.,  1.],
       [ 8.,  1.],
       [ 9.,  1.],
       [10.,  1.],
       [11.,  1.],
       [12.,  1.]])
In [7]:
arr = np.linalg.lstsq(A, y)[0]
m, c = arr
arr
/home/user/.virtualenvs/crosscompute/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.
To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.
  """Entry point for launching an IPython kernel.
Out[7]:
array([ -15.5, 1312. ])
In [8]:
y_new = m * x + c
y_new
Out[8]:
array([1296.5, 1281. , 1265.5, 1250. , 1234.5, 1219. , 1203.5, 1188. ,
       1172.5, 1157. , 1141.5, 1126. ])
In [9]:
fig = plt.figure()
plt.plot(x, y, 'o', label='original data', markersize=10)
plt.plot(x, m*x + c, 'r', label='fitted line')
plt.legend()
Out[9]:
<matplotlib.legend.Legend at 0x7f4836630978>
In [10]:
fitted_line_path = join(target_folder, 'fitted_line.png')
fig.savefig(fitted_line_path)
In [11]:
fig = plt.figure()
plt.plot(data_table)
Out[11]:
[<matplotlib.lines.Line2D at 0x7f4800a9b320>]
In [12]:
image_path = join(target_folder, 'data.png')
fig.savefig(image_path)
In [13]:
print('fitted_line_image_path = %s' % fitted_line_path)
print('data_image_path = %s' % image_path)
print('mean = %f' % data_table.mean())
print('stdev = %f' % data_table.std())
fitted_line_image_path = /tmp/fitted_line.png
data_image_path = /tmp/data.png
mean = 1211.250000
stdev = 341.656160