In [1]:

import itertools
import pylab as pl
import random
from sklearn import datasets, feature_selection, linear_model, neighbors, svm
digits = datasets.load_digits()

Train a classifier to recognize handwritten digits¶

Examine dataset.

In [2]:

# Look at the first image as an array
digits.images[0]

In [3]:

digits.images[0].shape

In [4]:

# Look at five random images and their corresponding labels
def draw_samples(images, labels):
    for index, (image, label) in enumerate(itertools.izip(images, labels)):
        pl.subplot(1, len(images), index + 1)
        pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
        pl.axis('off')
        pl.title('%s' % label)

indices = random.sample(xrange(len(digits.images)), 5)
draw_samples(digits.images[indices], digits.target[indices])

In [5]:

# Flatten each image into an array, where each pixel is a feature
# data = [image.ravel() for image in digits.images]
# We can do this more efficiently by reshaping the entire matrix at once, where
# -1 tells reshape () to determine the size of the second dimension automatically
data = digits.images.reshape(len(digits.images), -1)
data[0].shape

Train a supervised learning model and test its performance on an image it hasn't seen before.

In [6]:

sampleCount = len(data)
imageShape = digits.images[0].shape
# Train on the first half of the data
trainingData = data[:sampleCount / 2]
trainingLabels = digits.target[:sampleCount / 2]
# Test on five random images from the second half of the data
testData = random.sample(data, 5)

def train_and_test(model):
    model.fit(trainingData, trainingLabels)
    predictedLabels = model.predict(testData)
    draw_samples([x.reshape(imageShape) for x in testData], [int(x) for x in predictedLabels])

In [7]:

train_and_test(svm.SVC(gamma=0.001))

Try different supervised learning models.

In [8]:

train_and_test(linear_model.LogisticRegression())

In [9]:

train_and_test(neighbors.KNeighborsClassifier())

Discover informative features¶

Pick a supervised learning model.

In [10]:

model = svm.SVC(kernel='linear', gamma=0.001)

Pick a feature selection algorithm.

In [11]:

featureSelector = feature_selection.RFE(estimator=model,n_features_to_select=1, step=1)
featureSelector.fit(digits.data, digits.target)
featureRanking = featureSelector.ranking_.reshape(digits.images[0].shape)

Color the pixels that are most informative.

In [12]:

pl.matshow(featureRanking, cmap=pl.cm.hot_r)
pl.title('Pixel ranking by\nrecursive feature elimination')
pl.show()

Introduction to Computational Analysis

Train a classifier to recognize handwritten digits¶

Discover informative features¶

Learn more¶

Pay Notebook Creator: Roy Hyunjin Han	0
Set Container: Numerical CPU with TINY Memory for 10 Minutes	0
Total	0