Source code for rr.algorithm

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Wed 17 Jun 2015 17:51:02 CEST

import logging
logger = logging.getLogger()

import numpy
import bob.learn.linear


[docs]def make_labels(X): """Helper function that generates a single 1D numpy.ndarray with labels which are good targets for stock logistic regression. Parameters: X (numpy.ndarray): The input data matrix. This must be a numpy.ndarray with 3 dimensions or an iterable containing 2 numpy.ndarrays with 2 dimensions each. Each correspond to the data for one of the two classes, every row corresponds to one example of the data set, every column, one different feature. Returns: numpy.ndarray: With a single dimension, containing suitable labels for all rows and for all classes defined in X (depth). """ return numpy.hstack([k*numpy.ones(len(X[k]), dtype=int) for k in range(len(X))])
[docs]def add_bias(X): """Helper function to add a bias column to the input array X Parameters: X (numpy.ndarray): The input data matrix. This must be a numpy.ndarray with 2 dimension wheres every row corresponds to one example of the data set, every column, one different feature. Returns: numpy.ndarray: The same input matrix X with an added (prefix) column of ones. """ return numpy.hstack((numpy.ones((len(X),1), dtype=X.dtype), X))
[docs]class MultiClassMachine: """A class to handle all run-time aspects for Multiclass Log. Regression Parameters: machines (iterable): An iterable over any number of machines that will be stored. """ def __init__(self, machines): self.machines = machines def __call__(self, X): """Spits out the hypothesis for each machine given the data Parameters: X (numpy.ndarray): The input data matrix. This must be a numpy.ndarray with 2 dimensions. Every row corresponds to one example of the data set, every column, one different feature. Returns: numpy.ndarray: A 2D numpy.ndarray with as many entries as rows in the input 2D array ``X``, representing g(x), the sigmoidal hypothesis. Each column on the output array represents the output of one of the logistic regression machines in this """ return numpy.hstack([m(add_bias(X)) for m in self.machines])
[docs] def predict(self, X): """Predicts the class of each row of X Parameters: X (numpy.ndarray): The input data matrix. This must be a numpy.ndarray with 3 dimensions or an iterable containing 2 numpy.ndarrays with 2 dimensions each. Each correspond to the data for one of the two classes, every row corresponds to one example of the data set, every column, one different feature. Returns: numpy.ndarray: A 1D numpy.ndarray with as many entries as rows in the input 2D array ``X``, representing g(x), the class predictions for the current machine. """ return self(X).argmax(axis=1)
[docs]class MultiClassTrainer: """A class to handle all training aspects for Multiclass Log. Regression Parameters: regularizer (float): A regularization parameter """ def __init__(self, regularizer=0.0): self.regularizer = regularizer
[docs] def train(self, X): """ Trains multiple logistic regression classifiers to handle the multiclass problem posed by ``X`` X (numpy.ndarray): The input data matrix. This must be a numpy.ndarray with 3 dimensions or an iterable containing 2 numpy.ndarrays with 2 dimensions each. Each correspond to the data for one of the input classes, every row corresponds to one example of the data set, every column, one different feature. Returns: Machine: A trained multiclass machine. """ _trainer = bob.learn.linear.CGLogRegTrainer(**{'lambda':self.regularizer}) if len(X) == 2: #trains and returns a single logistic regression classifer return _trainer.train(add_bias(X[0]), add_bias(X[1])) else: #trains and returns a multi-class logistic regression classifier # use one-versus-all strategy machines = [] for k in range(len(X)): NC_range = list(range(0,k)) + list(range(k+1,len(X))) machines.append(_trainer.train(add_bias(numpy.vstack(X[NC_range])), add_bias(X[k]))) return MultiClassMachine(machines)