from sklearn.linear_model import LinearRegression
model = LinearRegression()
import pandas as pd
dataset = pd.DataFrame([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
], columns=['x1', 'x2', 'y'])
dataset
X = dataset[['x1', 'x2']].values
X
y = dataset['y'].values
y
model.fit(X, y)
model.predict([[8, 9]])
model.predict([
[0, 1],
[8, 9],
])
# Save using pickle
from pickle import dump
dump(model, open('dummy-model.pkl', 'wb'))
# Save using joblib which is another option
import subprocess
subprocess.call('pip install joblib'.split())
from joblib import dump
dump(model, '/tmp/dummy-model.joblib')
from pickle import load
model = load(open('dummy-model.pkl', 'rb'))
model
# Load using joblib which is another option
# import subprocess
# subprocess.call('pip install joblib'.split())
from joblib import load
model = load('/tmp/dummy-model.joblib')
model
import pandas as pd
t = pd.read_csv('example-dataset.csv')
t
X1 = t[[
'Tree Count Within 100 Meters',
'Sum of Distances from Trees Within 100 Meters',
'Average Risk of Trees Within 100 Meters']].values
X1
X2 = t[[
'Tree Count Within 100 Meters',
'Average Risk of Trees Within 100 Meters']].values
X2
y = t['Graduation Rate']
y
You will need to choose an appropriate metric to evaluate the performance of your fitted model.
Which metric you choose depends on whether you are performing classification, clustering or regression.
If the target variable that we want to predict is ...
from sklearn.model_selection import cross_val_score
models = []
scores = []
def train(model, X):
model.fit(X, y)
models.append(model)
score = cross_val_score(
model, X, y, cv=3,
scoring='neg_mean_absolute_error',
).mean()
scores.append(score)
return score
from sklearn.linear_model import LinearRegression
train(LinearRegression(), X1)
train(LinearRegression(), X2)
from sklearn.linear_model import BayesianRidge
train(BayesianRidge(), X1)
train(BayesianRidge(), X2)
from sklearn.svm import SVR
train(SVR(gamma='scale'), X1)
from sklearn.svm import SVR
train(SVR(gamma='scale'), X2)
import numpy as np
best_index = np.argmax(scores)
best_index
best_model = models[best_index]
best_model
import pickle
pickle.dump(best_model, open('/tmp/model.pkl', 'wb'))