Research in artificial intelligence involves experimenting with different ways to maximize learning and minimize error. Theano accelerates the experimentation process by making it easy to define, evaluate, differentiate functions on multi-dimensional arrays, with the optional benefit of using GPUs for speedups of up to 140x.
import theano
import theano.tensor as T
Add two scalars after defining and compiling a function.
x = T.dscalar('x') # Decimal scalar (float32)
y = T.dscalar('y')
f = theano.function([x, y], x + y)
f(2, 3)
f
Add two matrices after defining and compiling a function.
x = T.dmatrix('x')
y = T.dmatrix('y')
f = theano.function([x, y], x + y)
f([[1, 2], [3, 4]], [[-1, -2], [-3, -4]])
state = theano.shared(0)
x = T.iscalar('x') # Integer scalar (int32)
accumulate = theano.function([x], state, updates=[(state, state + x)])
accumulate(10); print state.get_value()
accumulate(20); print state.get_value()
x = T.dscalar('x')
y = x ** 2
f = theano.function([x], T.grad(y, x))
print 'The derivative of %s is %s.' % (
theano.pp(y),
theano.pp(f.maker.env.outputs[0]))
print 'Evaluating the derivative at x = 4 gives %s.' % f(4)
x = T.dmatrix('x')
s = T.sum(1 / (1 + T.exp(-x)))
gs = T.grad(s, x)
dlogistic = theano.function([x], gs)
dlogistic([[0, 1], [-1, -2]])
f = theano.function([x], T.jacobian(s, x))
f([[0, 1], [-1, -2]])
from theano import function, config, shared, sandbox
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 768 # 10 x #cores x # threads per core
iters = 1000
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], T.exp(x))
t0 = time.time()
for i in xrange(iters):
r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds'
print 'Result is', r
print 'Used the','cpu' if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.env.toposort()]) else 'gpu'
from theano import function, config, shared, sandbox, Out
import theano.tensor as T
import numpy
import time
vlen = 10 * 30 * 768 # 10 x #cores x # threads per core
iters = 1000
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([],
Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
borrow=True))
t0 = time.time()
for i in xrange(iters):
r = f()
print 'Looping %d times took'%iters, time.time() - t0, 'seconds'
print 'Result is', r
print 'Numpy result is', numpy.asarray(r)
print 'Used the','cpu' if numpy.any( [isinstance(x.op,T.Elemwise) for x in f.maker.env.toposort()]) else 'gpu'