from sklearn.model_selection import cross_val_score
import pandas as pd
t = pd.read_csv('example-dataset.csv')
t
t.columns
X1 = t[[
'Tree Count Within 100 Meters',
'Sum of Distances from Trees Within 100 Meters',
'Average Risk of Trees Within 100 Meters']].values
X1
X2 = t[[
'Tree Count Within 100 Meters',
'Average Risk of Trees Within 100 Meters']].values
X2
y = t['Graduation Rate']
y
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
model1.fit(X1, y)
cross_val_score(model1, X1, y, cv=3, scoring='neg_mean_absolute_error').mean()
model2 = LinearRegression()
model2.fit(X2, y)
cross_val_score(model2, X2, y, cv=3, scoring='neg_mean_absolute_error').mean()
from sklearn.linear_model import BayesianRidge
model3 = BayesianRidge()
model3.fit(X2, y)
cross_val_score(model3, X2, y, cv=3, scoring='neg_mean_absolute_error').mean()
from sklearn.svm import SVR
model4 = SVR(gamma='scale')
model4.fit(X2, y)
cross_val_score(model4, X2, y, cv=3, scoring='neg_mean_absolute_error').mean()
import pickle
pickle.dump(model4, open('/tmp/model.pkl', 'wb'))