Source code for nntoolkit.train

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Train a neural network."""


import numpy as np
import logging

import theano
import theano.tensor as tensor

# nntoolkit modules
import nntoolkit.utils as utils


[docs]def get_parser():
    """Return the parser object for this script."""
    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
    parser = ArgumentParser(description=__doc__,
                            formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument("-m", "--model",
                        dest="model_file",
                        help="where is the model file (.tar) which should get "
                             "trained?",
                        metavar="FILE",
                        type=lambda x: utils.is_valid_file(parser, x),
                        required=True)
    parser.add_argument("-i", "--input",
                        dest="training_data",
                        help="""a file which contains training data (.tar)""",
                        metavar="FILE",
                        type=lambda x: utils.is_valid_file(parser, x),
                        required=True)
    parser.add_argument("-o", "--output",
                        dest="model_output_file",
                        help="""where should the new model be written?""",
                        metavar="FILE",
                        required=True)
    parser.add_argument("--batchsize",
                        dest="batch_size",
                        help=("A positive number which indicates how many "
                              "training examples get looked at before the "
                              "parameters get updated."),
                        default=256,
                        type=int)
    parser.add_argument("-lr", "--learningrate",
                        dest="learning_rate",
                        help=("A positive number, typically between 0 and "
                              "10."),
                        default=0.1,
                        type=float)
    parser.add_argument("--epochs",
                        dest="epochs",
                        help=("Positive number of training epochs"),
                        default=100,
                        type=int)
    return parser


[docs]def minibatch_gradient_descent(model,
                               x,
                               y,
                               batch_size=256,
                               lr=0.1,
                               epochs=100):
    """Train a given neural network.

    :param model: A parsed model
    :param x: A numpy array with features
    :param y: The list of classes
    :param batch_size: A positive integer which defines after how many training
        examples the values of the neural network get adjusted
    :param lr: Learning rate. Has to be positive.
    """
    assert lr > 0
    assert batch_size >= 1

    # See:
    # http://deeplearning.net/software/theano/cifarSC2011/introduction.html
    # http://deeplearning.net/tutorial/mlp.html

    # symbol declarations
    sx = tensor.matrix()
    sy = tensor.matrix()

    params_to_learn = []
    layers_shared = []
    for i, layer in enumerate(model['layers']):
        W = np.array(layer['W'], dtype=theano.config.floatX)
        W = theano.shared(W, name='W%i' % i)
        b = np.array(layer['b'], dtype=theano.config.floatX)
        b = b.reshape((1, len(b)))
        b = theano.shared(b, name='b%i' % i)
        layers_shared.append({'W': W, 'b': b})
        params_to_learn.append(W)
        params_to_learn.append(b)

    # symbolic expression-building
    last_output = sx
    for layer in layers_shared:
        W, b = layer['W'], layer['b']
        # TODO: Sigmoid - make dependant from activation function
        hid = tensor.tanh(tensor.dot(last_output, W) + b)
        last_output = hid
    out = tensor.argmax(last_output, axis=1)

    # Classification error
    err = 0.5 * tensor.sum(tensor.neq(out, sy)) ** 2

    # Build dictuionary of parameters which get updated
    u_params_to_learn = []
    g_params_to_learn = tensor.grad(cost=err, wrt=params_to_learn)
    for param, gparam in zip(params_to_learn, g_params_to_learn):
        u_params_to_learn.append((param, param - lr * gparam))

    # compile a fast training function
    train = theano.function(inputs=[sx, sy],
                            outputs=err,
                            updates=u_params_to_learn,
                            allow_input_downcast=True)

    # now do the computations
    loops_per_epoch = max(1, int(len(x) / batch_size))
    logging.debug("batch_size: %i", batch_size)
    logging.debug("Loops per epoch: %i", loops_per_epoch)
    trainingloops = epochs * loops_per_epoch
    for i in range(trainingloops):
        start = (i * batch_size) % len(x)
        end = ((i + 1) * batch_size) % len(x)
        if start > end:
            continue  # TODO: Eventually we miss training examples!
        x_i = x[start:end]
        y_i = y[start:end]
        err_i = train(x_i, y_i)
        #if i % loops_per_epoch == 0:
        print("Epoch %i/%i, Loss %0.2f" % (i+1, i / loops_per_epoch, err_i))


[docs]def main(model_file,
         model_output_file,
         training_data,
         batch_size,
         learning_rate,
         epochs):
    """Train model_file with training_data."""
    data = utils.get_data(training_data)
    if data is None:
        logging.error("Data could not be loaded. Stop training.")
        return
    x, y = data
    assert y is not None
    model = utils.get_model(model_file)
    minibatch_gradient_descent(model,
                               x, y,
                               batch_size,
                               learning_rate,
                               epochs)
    utils.write_model(model, model_output_file)

if __name__ == '__main__':
    args = get_parser().parse_args()
    main(args.model_file,
         args.model_output_file,
         args.training_data,
         args.batch_size,
         args.learning_rate,
         args.epochs)
Navigation

Source code for nntoolkit.train

Quick search

Navigation