Chunk large CSV files.
def load(filePath, **kw):
    ignore_index = 'index_col' not in kw
    chunkIterator = read_csv(filePath, iterator=True, chunksize=10000, **kw)
    return concat(chunkIterator, ignore_index=ignore_index)
Limit columns in DataFrames.
from pandas import read_csv
chocolate = read_csv('datasets/UN-Chocolate.csv')
chocolate
chocolate[['Year', 'Flow']]
h5pynumpy.memmaplru_cache or dogpile.cache for computationally intensive operationsfrom dogpile.cache import make_region
region = make_region().configure('dogpile.cache.memory')
cache_on_arguments = region.cache_on_arguments
@cache_on_arguments()
def f(x):
    print 'Wheee!'
    return x
print f(1)
print f(1)  # Cached
print f(2)
print f(2)  # Cached
Select features with cross-validation
Select models with cross-validation
Scale samples
Decorrelate samples
Cross-validate with transformations by pipelining
Interpolate missing labels