Plotting data




Pay Notebook Creator: Salah Ahmed0
Set Container: Numerical CPU with TINY Memory for 10 Minutes 0
Total0

Plotting data

input data to get plot

Uses example of numpy.linalg.lstsq

In [4]:
# Crosscompute
data_table_path = 'data.csv'
target_folder = '/tmp'
In [5]:
%pylab inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from os.path import join
Populating the interactive namespace from numpy and matplotlib
In [6]:
data_table = pd.read_csv(data_table_path)
# data_table = np.loadtxt(data_table_path, skiprows=1)
In [7]:
l = len(data_table)
y = data_table.values.reshape((l,))
y
Out[7]:
array([1304,  838, 1636, 1764,  725, 1105, 1623,  820, 1412, 1103, 1057,
       1148])
In [8]:
x = np.arange(1, l + 1)
x
Out[8]:
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
In [9]:
# coefficient matrix
A = np.vstack([x, np.ones(l)]).T
A
Out[9]:
array([[  1.,   1.],
       [  2.,   1.],
       [  3.,   1.],
       [  4.,   1.],
       [  5.,   1.],
       [  6.,   1.],
       [  7.,   1.],
       [  8.,   1.],
       [  9.,   1.],
       [ 10.,   1.],
       [ 11.,   1.],
       [ 12.,   1.]])
In [11]:
arr = np.linalg.lstsq(A, y)[0]
m, c = arr
arr
Out[11]:
array([  -15.5,  1312. ])
In [12]:
y_new = m * x + c
y_new
Out[12]:
array([ 1296.5,  1281. ,  1265.5,  1250. ,  1234.5,  1219. ,  1203.5,
        1188. ,  1172.5,  1157. ,  1141.5,  1126. ])
In [13]:
fig = plt.figure()
plt.plot(x, y, 'o', label='original data', markersize=10)
plt.plot(x, m*x + c, 'r', label='fitted line')
plt.legend()
Out[13]:
<matplotlib.legend.Legend at 0x7ff41dd3abe0>
In [14]:
fitted_line_path = join(target_folder, 'fitted_line.png')
fig.savefig(fitted_line_path)
In [15]:
fig = plt.figure()
plt.plot(data_table)
Out[15]:
[<matplotlib.lines.Line2D at 0x7ff3eba29c88>]
In [16]:
image_path = join(target_folder, 'data.png')
fig.savefig(image_path)
In [17]:
print('fitted_line_image_path = %s' % fitted_line_path)
print('data_image_path = %s' % image_path)
print('mean = %f' % data_table.mean())
print('stdev = %f' % data_table.std())
fitted_line_image_path = /tmp/fitted_line.png
data_image_path = /tmp/data.png
mean = 1211.250000
stdev = 341.656160