Source code for rr.database

#!/usr/bin/env python
# encoding: utf-8
# Andre Anjos <andre.anjos@idiap.ch>
# Fri 19 Jun 17:49:18 2015 CEST

'''Database specifications for an evaluation protocol based on the Iris Flower
databases from Fisher's original work.'''


import numpy
import bob.db.iris


# A list of protocols we implement
PROTOCOLS = {
        'proto1': {'train': range(0, 30), 'test': range(30, 50)},
        'proto2': {'train': range(20, 50), 'test': range(0, 20)},
        }

# Subsets of the database in each protocol
SUBSETS = [
        'train',
        'test',
        ]

# The types of Iris flowers in the dataset
CLASSES = [
        'setosa',
        'versicolor',
        'virginica',
        ]

# The four values that were sampled
VARIABLES = bob.db.iris.names


[docs]def split_data(data, subset, splits): '''Returns the data for a given protocol ''' return dict([(k, data[k][splits[subset]]) for k in data])
[docs]def get(protocol, subset, classes=CLASSES, variables=VARIABLES): '''Returns the data subset given a particular protocol Parameters protocol (string): one of the valid protocols supported by this interface subset (string): one of 'train' or 'test' classes (list of string): a list of strings containing the names of the classes from which you want to have the data from variables (list of strings): a list of strings containg the names of the variables (features) you want to have data from Returns: data (numpy.ndarray): The data for all the classes and variables nicely packed into one numpy 3D array. One depth represents the data for one class, one row is one example, one column a given feature. ''' retval = split_data(bob.db.iris.data(), subset, PROTOCOLS[protocol]) # filter variables (features) varindex = [VARIABLES.index(k) for k in variables] # filter class names and variable indexes at the same time retval = dict([(k, retval[k][:,varindex]) for k in classes]) # squash the data return numpy.array([retval[k] for k in classes])