For the complete list of supervised learning algorithms available in scikit-learn, please see the documentation.
Regression
Hyper parameters
Estimated parameters
Loss function
Regularization
Feature selection
import pylab as pl
from sklearn import datasets, linear_model
vectors = [
[0, 0],
[1, 1],
[2, 2],
]
targets = [
0,
1,
2,
]
iris = datasets.load_iris()
boston = datasets.load_boston()
diabetes = datasets.load_diabetes()
Generalized linear models compute a linear combination of the features to estimate the target variable.
model = linear_model.LinearRegression()
model.fit(vectors, targets)
print model.coef_
print model.intercept_
print model.predict([3, 3])
# Exercise:
# Fit a linear regression on the Boston House Prices dataset.
# Look at the coefficients.
model = linear_model.Ridge(alpha=0.5)
model.fit(vectors, targets)
print model.coef_
print model.intercept_
model = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
model.fit(vectors, targets)
model.alpha_
# Exercise:
# Find an optimal value of alpha for
# Ridge Regression on the Boston House Prices dataset.
model = linear_model.Lasso(alpha=0.1)
model.fit(vectors, targets)
print model.coef_
print model.intercept_
# Adapted from
# http://scikit-learn.org/dev/auto_examples/linear_model/plot_lasso_model_selection.html
model = linear_model.LassoCV(cv=20)
model.fit(diabetes.data, diabetes.target)
def plot_model_selection(model):
logAlphas = -np.log10(model.alphas_)
pl.figure()
pl.plot(logAlphas, model.mse_path_, ':')
pl.plot(logAlphas, model.mse_path_.mean(axis=-1), 'k',
label='Average across folds', linewidth=2)
pl.axvline(-np.log10(model.alpha_), linestyle='--', color='k',
label='alpha: CV estimate')
pl.legend()
pl.title('Mean square error on each fold: Coordinate descent')
pl.xlabel('-log(alpha)')
pl.ylabel('Mean square error')
pl.axis('tight')
pl.ylim(2300, 3800)
plot_model_selection(model)
model = linear_model.LassoLars(alpha=0.1)
model.fit(vectors, targets)
print model.coef_
print model.intercept_
# Adapted from
# http://scikit-learn.org/dev/auto_examples/linear_model/plot_lasso_model_selection.html
model = linear_model.LassoLarsIC(criterion='bic')
model.fit(diabetes.data, diabetes.target)
pl.figure()
pl.plot(-np.log10(model.alphas_), model.criterion_, '--', linewidth=3)
pl.axvline(-np.log10(model.alpha_), linewidth=3)
pl.xlabel('-log(alpha)')
pl.ylabel('Information criterion')
pl.title('Model selection by information criterion');
# Exercise:
# Modify the above example to use the Akaike information criterion
# instead of the Bayes Information criterion.
# Exercise:
# Use cross validation instead of information criterion
# to select alpha.
Bayesian regression optimizes model parameters and regularization parameters at the same time.
model = linear_model.BayesianRidge()
model.fit(vectors, targets)
model = linear_model.LogisticRegression()
model.fit(iris.data[:-1], iris.target[:-1])
print model.predict(iris.data[-1])
print iris.target[-1]