Source code for facereclib.tools.LDA

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>

import bob.io.base
import bob.learn.linear

import numpy
import scipy.spatial

from .Tool import Tool
from .. import utils

[docs]class LDA (Tool): """Tool for computing linear discriminant analysis (so-called Fisher faces)""" def __init__( self, lda_subspace_dimension = 0, # if set, the LDA subspace will be truncated to the given number of dimensions; by default it is limited to the number of classes in the training set pca_subspace_dimension = None, # if set, a PCA subspace truncation is performed before applying LDA; might be integral or float distance_function = scipy.spatial.distance.euclidean, is_distance_function = True, uses_variances = False, **kwargs # parameters directly sent to the base class ): """Initializes the LDA tool with the given configuration""" # call base class constructor and register that the LDA tool performs projection and need the training features split by client Tool.__init__( self, performs_projection = True, split_training_features_by_client = True, lda_subspace_dimension = lda_subspace_dimension, pca_subspace_dimension = pca_subspace_dimension, distance_function = str(distance_function), is_distance_function = is_distance_function, uses_variances = uses_variances, **kwargs ) # copy information self.m_pca_subspace = pca_subspace_dimension self.m_lda_subspace = lda_subspace_dimension if self.m_pca_subspace and isinstance(self.m_pca_subspace, int) and self.m_lda_subspace and self.m_pca_subspace < self.m_lda_subspace: raise ValueError("The LDA subspace is larger than the PCA subspace size. This won't work properly. Please check your setup!") self.m_machine = None self.m_distance_function = distance_function self.m_factor = -1 if is_distance_function else 1. self.m_uses_variances = uses_variances def __read_data__(self, training_files): data = [] for client_files in training_files: # at least two files per client are required! if len(client_files) < 2: utils.warn("Skipping one client since the number of client files is only %d" %len(client_files)) continue data.append(numpy.vstack([feature.flatten() for feature in client_files])) # Returns the list of lists of arrays return data def __train_pca__(self, training_set): """Trains and returns a LinearMachine that is trained using PCA""" data_list = [feature for client in training_set for feature in client] data = numpy.vstack(data_list) utils.info(" -> Training LinearMachine using PCA") t = bob.learn.linear.PCATrainer() machine, eigen_values = t.train(data) if isinstance(self.m_pca_subspace, float): cummulated = numpy.cumsum(eigen_values) / numpy.sum(eigen_values) for index in range(len(cummulated)): if cummulated[index] > self.m_pca_subspace: self.m_pca_subspace = index break self.m_pca_subspace = index if self.m_lda_subspace and self.m_pca_subspace <= self.m_lda_subspace: utils.warn(" ... Extending the PCA subspace dimension from %d to %d" % (self.m_pca_subspace, self.m_lda_subspace + 1)) self.m_pca_subspace = self.m_lda_subspace + 1 else: utils.info(" ... Limiting PCA subspace to %d dimensions" % self.m_pca_subspace) # limit number of pcs machine.resize(machine.shape[0], self.m_pca_subspace) return machine def __perform_pca__(self, machine, training_set): """Perform PCA on data""" data = [] for client_features in training_set: data.append(numpy.vstack([machine(feature) for feature in client_features])) return data
[docs] def train_projector(self, training_features, projector_file): """Generates the LDA projection matrix from the given features (that are sorted by identity)""" # Initializes an array for the data data = self.__read_data__(training_features) if self.m_pca_subspace: pca_machine = self.__train_pca__(data) utils.info(" -> Projecting training data to PCA subspace") data = self.__perform_pca__(pca_machine, data) utils.info(" -> Training LinearMachine using LDA") t = bob.learn.linear.FisherLDATrainer(strip_to_rank = (self.m_lda_subspace == 0)) self.m_machine, self.m_variances = t.train(data) if self.m_lda_subspace: self.m_machine.resize(self.m_machine.shape[0], self.m_lda_subspace) self.m_variances = self.m_variances.copy() self.m_variances.resize(self.m_lda_subspace) if self.m_pca_subspace: # compute combined PCA/LDA projection matrix combined_matrix = numpy.dot(pca_machine.weights, self.m_machine.weights) # set new weight matrix (and new mean vector) of novel machine self.m_machine = bob.learn.linear.Machine(combined_matrix) self.m_machine.input_subtract = pca_machine.input_subtract f = bob.io.base.HDF5File(projector_file, "w") f.set("Eigenvalues", self.m_variances) f.create_group("Machine") f.cd("/Machine") self.m_machine.save(f)
[docs] def load_projector(self, projector_file): """Reads the LDA projection matrix from file""" # read PCA projector f = bob.io.base.HDF5File(projector_file) self.m_variances = f.read("Eigenvalues") f.cd("/Machine") self.m_machine = bob.learn.linear.Machine(f) # Allocates an array for the projected data self.m_projected_feature = numpy.ndarray(self.m_machine.shape[1], numpy.float64)
[docs] def project(self, feature): """Projects the data using the stored covariance matrix""" # Projects the data self.m_machine(feature, self.m_projected_feature) # return the projected data return self.m_projected_feature
[docs] def enroll(self, enroll_features): """Enrolls the model by computing an average of the given input vectors""" assert len(enroll_features) # just store all the features model = numpy.zeros((len(enroll_features), enroll_features[0].shape[0]), numpy.float64) for n, feature in enumerate(enroll_features): model[n,:] += feature[:] # return enrolled model return model
[docs] def score(self, model, probe): """Computes the distance of the model to the probe using the distance function taken from the config file""" # return the negative distance (as a similarity measure) if len(model.shape) == 2: # we have multiple models, so we use the multiple model scoring return self.score_for_multiple_models(model, probe) elif self.m_uses_variances: # single model, single probe (multiple probes have already been handled) return self.m_factor * self.m_distance_function(model, probe, self.m_variances) else: # single model, single probe (multiple probes have already been handled) return self.m_factor * self.m_distance_function(model, probe)