Source code for bob.db.multipie.query

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""This module provides the Dataset interface allowing the user to query the
Multi-PIE database in the most obvious ways.
"""

import os
from bob.db.base import utils
from .models import *
from .driver import Interface
import bob.db.base

SQLITE_FILE = Interface().files()[0]


[docs]class Database(bob.db.base.SQLiteDatabase): """The dataset class opens and maintains a connection opened to the Database. It provides many different ways to probe for the characteristics of the data and for the data itself inside the database. """ def __init__(self, original_directory=None, original_extension='.png', annotation_directory=None, annotation_extension='.pos'): # NOTE: The default original extension '.png' is only valid for the # "multiview" data, but not for the "highres" images, which are stored as # '.jpg' super(Database, self).__init__(SQLITE_FILE, File, original_directory, original_extension) self.annotation_directory = annotation_directory self.annotation_extension = annotation_extension
[docs] def groups(self, protocol=None): """Returns the names of all registered groups""" # Same as Client.group_choices for this database return ProtocolPurpose.group_choices
[docs] def genders(self): """Returns the list of genders""" return Client.gender_choices
[docs] def subworlds(self): """Returns the list of subworlds""" return list(self.query(Subworld))
[docs] def has_subworld(self, name): """Tells if a certain subworld is available""" return self.query(Subworld).filter(Subworld.name == name).count() != 0
[docs] def subworld_names(self): """Returns all registered subworld names""" l = self.subworlds() retval = [str(k.name) for k in l] return retval
[docs] def expressions(self): """Returns the list of expressions""" return list(self.query(Expression))
[docs] def has_expression(self, name): """Tells if a certain expression is available""" return self.query(Expression).filter(Expression.name == name).count() != 0
[docs] def expression_names(self): """Returns all registered expression names""" l = self.expressions() retval = [str(k.name) for k in l] return retval
[docs] def cameras(self): """Returns the list of cameras""" return list(self.query(Camera))
[docs] def has_camera(self, name): """Tells if a certain camera is available""" return self.query(Camera).filter(Camera.name == name).count() != 0
[docs] def camera_names(self): """Returns all registered camera names""" return [str(c.name) for c in self.cameras()]
[docs] def clients(self, protocol=None, groups=None, subworld=None, genders=None, birthyears=None): """Returns a set of Clients for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the clients belong ('dev', 'eval', 'world') subworld Specify a split of the world data ('sub41', 'sub81', 'sub121', 'sub161') In order to be considered, "world" should be in groups. genders The genders to which the clients belong ('female', 'male') birthyears The birth year of the clients (in the range [1900,2050]) Returns: A list containing all the Clients which have the given properties. """ VALID_BIRTHYEARS = list(range(1900, 2050)) VALID_BIRTHYEARS.append(57) # bug in subject_list.txt (57 instead of 1957) protocol = self.check_parameters_for_validity( protocol, 'protocol', self.protocol_names()) groups = self.check_parameters_for_validity(groups, 'group', self.groups()) if subworld: subworld = self.check_parameters_for_validity( subworld, 'subworld', self.subworld_names()) genders = self.check_parameters_for_validity( genders, 'gender', self.genders()) birthyears = self.check_parameters_for_validity( birthyears, 'birthyear', VALID_BIRTHYEARS) # List of the clients retval = [] # World data if "world" in groups: q = self.query(Client) if subworld: q = q.join((Subworld, Client.subworld)).filter( Subworld.name.in_(subworld)) q = q.filter(Client.sgroup == 'world').\ filter(Client.gender.in_(genders)).\ filter(Client.birthyear.in_(birthyears)).\ order_by(Client.id) retval += list(q) # dev / eval data if 'dev' in groups or 'eval' in groups: q = self.query(Client).\ filter(and_(Client.sgroup != 'world', Client.sgroup.in_(groups))).\ filter(Client.gender.in_(genders)).\ filter(Client.birthyear.in_(birthyears)).\ order_by(Client.id) retval += list(q) return retval
[docs] def has_client_id(self, id): """Returns True if we have a client with a certain integer identifier""" return self.query(Client).filter(Client.id == id).count() != 0
[docs] def client(self, id): """Returns the Client object in the database given a certain id. Raises an error if that does not exist.""" return self.query(Client).filter(Client.id == id).one()
[docs] def tclients(self, protocol=None, groups=None): """Returns a set of T-Norm clients for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the clients belong ('dev', 'eval'). Returns: A list containing all the client ids belonging to the given group. """ groups = self.check_parameters_for_validity( groups, "group", ('dev', 'eval')) tgroups = [] if 'dev' in groups: tgroups.append('eval') if 'eval' in groups: tgroups.append('dev') return self.clients(protocol, tgroups)
[docs] def zclients(self, protocol=None, groups=None): """Returns a set of Z-Norm clients for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the clients belong ('dev', 'eval'). Returns: A list containing all the client ids belonging to the given group. """ groups = self.check_parameters_for_validity( groups, "group", ('dev', 'eval')) zgroups = [] if 'dev' in groups: zgroups.append('eval') if 'eval' in groups: zgroups.append('dev') return self.clients(protocol, zgroups)
[docs] def models(self, protocol=None, groups=None): """Returns a set of models for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the subjects attached to the models belong ('dev', 'eval', 'world') Returns: A list containing all the models belonging to the given group. """ return self.clients(protocol, groups)
[docs] def model_ids(self, protocol=None, groups=None): """Returns a set of model ids for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the subjects attached to the models belong ('dev', 'eval', 'world') Returns: A list containing the ids all the models belonging to the given group. """ return [client.id for client in self.clients(protocol, groups)]
[docs] def tmodels(self, protocol=None, groups=None): """Returns a set of T-Norm models for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the models belong ('dev', 'eval'). Returns: A list containing all the T-Norm models belonging to the given group. """ return self.tclients(protocol, groups)
[docs] def tmodel_ids(self, protocol=None, groups=None): """Returns a set of T-Norm model ids for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) groups The groups to which the models belong ('dev', 'eval'). Returns: A list containing all ids of the T-Norm models belonging to the given group. """ return [client.id for client in self.tclients(protocol, groups)]
[docs] def get_client_id_from_model_id(self, model_id, **kwargs): """Returns the client_id attached to the given model_id Keyword Parameters: model_id The model_id to consider Returns: The client_id attached to the given model_id """ return model_id
[docs] def objects(self, protocol=None, purposes=None, model_ids=None, groups=None, classes=None, subworld=None, expressions=None, cameras=None, world_sampling=1, world_noflash=False, world_first=False, world_second=False, world_third=False, world_fourth=False, world_nshots=None, world_shots=None): """Returns a set of Files for the specific query by the user. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) purposes The purposes required to be retrieved ('enroll', 'probe', 'train') or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. This field is ignored for the data from the "world" group. model_ids Only retrieves the files for the provided list of model ids (claimed client id). If 'None' is given (this is the default), no filter over the model_ids is performed. groups One of the groups ('dev', 'eval', 'world') or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. classes The classes (types of accesses) to be retrieved ('client', 'impostor') or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. subworld Specify a split of the world data ('sub41', 'sub81', 'sub121', 'sub161') In order to be considered, "world" should be in groups. expressions The (face) expressions to be retrieved (use expression_names() to get the list of expressions) or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. Notice that some protocols only contain images with 'neutral' expression. cameras The cameras to be retrieved (use camera_names() to get the list of cameras) r a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. The cameras keyword has no impact for some protocols (frontal images ones). world_sampling Samples the files from the world data set. Keeps only files such as:: File.client_id + File.shot_id % world_sampling == 0 This argument should be an integer between 1 (keep everything) and 19. It is not used if world_noflash is also set. world_nshots Only considers the n first shots from the world data set. world_shots Only considers the shots with the given ids. world_noflash Keeps the files from the world dataset recorded without flash (shot 0) world_first Only uses data from the first recorded session of each user of the world dataset. world_second Only uses data from the second recorded session of each user of the world dataset. world_third Only uses data from the third recorded session of each user of the world dataset. world_fourth Only uses data from the fourth recorded session of each user of the world dataset. Returns: A set of Files with the given properties. """ protocol = self.check_parameters_for_validity( protocol, 'protocol', self.protocol_names()) purposes = self.check_parameters_for_validity( purposes, 'purpose', self.purposes()) groups = self.check_parameters_for_validity(groups, 'group', self.groups()) classes = self.check_parameters_for_validity( classes, 'class', ('client', 'impostor')) if subworld: subworld = self.check_parameters_for_validity( subworld, 'subworld', self.subworld_names()) if expressions: expressions = self.check_parameters_for_validity( expressions, 'expression', self.expression_names()) if cameras: cameras = self.check_parameters_for_validity( cameras, 'camera', self.camera_names()) import collections if(model_ids is None): model_ids = () elif(not isinstance(model_ids, collections.Iterable)): model_ids = (model_ids,) # Now query the database retval = [] if 'world' in groups: q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup == 'world')) if subworld: q = q.join((Subworld, Client.subworld)).filter( Subworld.name.in_(subworld)) if expressions: q = q.join(Expression).filter(Expression.name.in_(expressions)) if cameras or world_nshots or world_shots or (world_sampling != 1 and world_noflash == False) or world_noflash: q = q.join(FileMultiview) if cameras: q = q.join(Camera).filter(Camera.name.in_(cameras)) if world_nshots: max1 = 19 max2 = 19 max3 = 19 max4 = 19 if world_nshots < 19: max1 = world_nshots max2 = 0 max3 = 0 max4 = 0 elif world_nshots < 38: max2 = world_nshots - 19 max3 = 0 max4 = 0 elif world_nshots < 57: max3 = world_nshots - 38 max4 = 0 else: max4 = world_nshots - 57 q = q.filter(or_(and_(File.session_id == Client.first_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max1), and_(File.recording_id == 2, FileMultiview.shot_id < max2))), and_(File.session_id == Client.second_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max2), and_(File.recording_id == 2, FileMultiview.shot_id < max3))), and_(File.session_id == Client.third_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max3), and_(File.recording_id == 2, FileMultiview.shot_id < max4))), and_(File.session_id == Client.fourth_session, FileMultiview.shot_id < max4))) if world_shots: q = q.filter(FileMultiview.shot_id.in_(world_shots)) if (world_sampling != 1 and world_noflash == False): q = q.filter(((File.client_id + FileMultiview.shot_id) % world_sampling) == 0) if world_noflash: q = q.filter(FileMultiview.shot_id == 0) if world_first: q = q.filter(and_(File.session_id == Client.first_session, or_(Client.first_session != 4, and_(Client.first_session == 4, File.recording_id == 1)))) if world_second: q = q.filter(or_(and_(Client.second_session != 4, File.session_id == Client.second_session), or_(and_(Client.first_session == 4, and_(File.session_id == 4, File.recording_id == 2)), and_(Client.second_session == 4, and_(File.session_id == 4, File.recording_id == 1))))) if world_third: q = q.filter(or_(and_(Client.third_session != 4, File.session_id == Client.third_session), or_(and_(Client.second_session == 4, and_(File.session_id == 4, File.recording_id == 2)), and_(Client.third_session == 4, and_(File.session_id == 4, File.recording_id == 1))))) if world_fourth: q = q.filter(or_(and_(Client.fourth_session != 4, File.session_id == Client.fourth_session), or_(and_(Client.third_session == 4, and_(File.session_id == 4, File.recording_id == 2)), and_(Client.fourth_session == 4, and_(File.session_id == 4, File.recording_id == 1))))) if model_ids: q = q.filter(Client.id.in_(model_ids)) q = q.order_by(File.client_id, File.session_id, File.recording_id, File.id) retval += list(q) if ('dev' in groups or 'eval' in groups): if('enroll' in purposes): q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( groups), ProtocolPurpose.purpose == 'enroll')) if expressions: q = q.join(Expression).filter(Expression.name.in_(expressions)) if cameras: q = q.join(FileMultiview).join( Camera).filter(Camera.name.in_(cameras)) if model_ids: q = q.filter(Client.id.in_(model_ids)) q = q.order_by(File.client_id, File.session_id, File.recording_id, File.id) retval += list(q) if('probe' in purposes): if('client' in classes): q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( groups), ProtocolPurpose.purpose == 'probe')) if expressions: q = q.join(Expression).filter(Expression.name.in_(expressions)) if cameras: q = q.join(FileMultiview).join( Camera).filter(Camera.name.in_(cameras)) if model_ids: q = q.filter(Client.id.in_(model_ids)) q = q.order_by(File.client_id, File.session_id, File.recording_id, File.id) retval += list(q) if('impostor' in classes): q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\ filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_( groups), ProtocolPurpose.purpose == 'probe')) if expressions: q = q.join(Expression).filter(Expression.name.in_(expressions)) if cameras: q = q.join(FileMultiview).join( Camera).filter(Camera.name.in_(cameras)) if len(model_ids) == 1: q = q.filter(not_(Client.id.in_(model_ids))) q = q.order_by(File.client_id, File.session_id, File.recording_id, File.id) retval += list(q) return list(set(retval)) # To remove duplicates
[docs] def tobjects(self, protocol=None, model_ids=None, groups=None, expressions=None): """Returns a set of filenames for enrolling T-norm models for score normalization. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) model_ids Only retrieves the files for the provided list of model ids (claimed client id). If 'None' is given (this is the default), no filter over the model_ids is performed. groups The groups to which the clients belong ('dev', 'eval'). expressions The (face) expressions to be retrieved (use expression_names() to get the list of expressions) or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. Notice that some protocols only contain images with 'neutral' expression. Returns: A list of Files with the given properties. """ groups = self.check_parameters_for_validity( groups, "group", ('dev', 'eval')) tgroups = [] if 'dev' in groups: tgroups.append('eval') if 'eval' in groups: tgroups.append('dev') return self.objects(protocol, 'enroll', model_ids, tgroups, 'client', None, expressions)
[docs] def zobjects(self, protocol=None, model_ids=None, groups=None, expressions=None): """Returns a set of filenames of impostors for Z-norm score normalization. Keyword Parameters: protocol One of the Multi-PIE protocols (use protocol_names() to get the list of available ones) model_ids Only retrieves the files for the provided list of model ids (client id). If 'None' is given (this is the default), no filter over the model_ids is performed. groups The groups to which the clients belong ('dev', 'eval'). expressions The (face) expressions to be retrieved (use expression_names() to get the list of expressions) or a tuple with several of them. If 'None' is given (this is the default), it is considered the same as a tuple with all possible values. Notice that some protocols only contain images with 'neutral' expression. Returns: A list of Files with the given properties. """ groups = self.check_parameters_for_validity( groups, "group", ('dev', 'eval')) zgroups = [] if 'dev' in groups: zgroups.append('eval') if 'eval' in groups: zgroups.append('dev') return self.objects(protocol, 'probe', model_ids, zgroups, 'client', None, expressions)
[docs] def annotations(self, file): """Reads the annotations for the given file id from file and returns them in a dictionary. Depending on the view type of the file (i.e., the camera), different annotations might be returned. If you have no copy of the annotations yet, you can download them under http://www.idiap.ch/resource/biometric, where you also can find more information about the annotations. Keyword parameters: file The ID of the file for which the annotations should be read. Return value The annotations as a dictionary, e.g., {'reye':(re_y,re_x), 'leye':(le_y,le_x), ...} """ if self.annotation_directory is None: return None annotation_file = file.make_path( self.annotation_directory, self.annotation_extension) if not os.path.exists(annotation_file): raise IOError("The annotation file '%s' was not found" % annotation_file) # read annotations from file annotations = {} with open(annotation_file) as f: count = int(f.readline()) if count == 6: # profile annotations labels = ['eye', 'nose', 'mouth', 'lipt', 'lipb', 'chin'] elif count == 8: # half profile annotations labels = ['reye', 'leye', 'nose', 'mouthr', 'mouthl', 'lipt', 'lipb', 'chin'] elif count == 16: # frontal image annotations labels = ['reye', 'leye', 'reyeo', 'reyei', 'leyei', 'leyeo', 'nose', 'mouthr', 'mouthl', 'lipt', 'lipb', 'chin', 'rbrowo', 'rbrowi', 'lbrowi', 'lbrowo'] elif count == 2: # for inclomplete annotations, only the two eye locations are available labels = ['reye', 'leye'] else: raise ValueError("The number %d of annotations in file '%s' is not handled." % ( count, annotation_file)) for i in range(count): line = f.readline() positions = line.split() assert len(positions) == 2 annotations[labels[i]] = (float(positions[1]), float(positions[0])) # done. return annotations
[docs] def protocol_names(self): """Returns all registered protocol names""" return [str(p.name) for p in self.protocols()]
[docs] def protocols(self): """Returns all registered protocols""" return list(self.query(Protocol))
[docs] def has_protocol(self, name): """Tells if a certain protocol is available""" return self.query(Protocol).filter(Protocol.name == name).count() != 0
[docs] def protocol(self, name): """Returns the protocol object in the database given a certain name. Raises an error if that does not exist.""" return self.query(Protocol).filter(Protocol.name == name).one()
[docs] def protocol_purposes(self): """Returns all registered protocol purposes""" return list(self.query(ProtocolPurpose))
[docs] def purposes(self): """Returns the list of allowed purposes""" return ProtocolPurpose.purpose_choices
[docs] def t_model_ids(self, protocol, groups='dev', **kwargs): """Returns the list of model ids used for T-Norm of the given protocol for the given group that satisfy your query. For possible keyword arguments, please check the :py:meth:`tmodel_ids` function.""" return self.uniquify(self.tmodel_ids(protocol=protocol, groups=groups, **kwargs))
[docs] def t_enroll_files(self, protocol, model_id, groups='dev', **kwargs): """Returns the list of T-Norm model enrollment File objects from the given model id of the given protocol for the given group that satisfy your query. For possible keyword arguments, please check the :py:meth:`tobjects` function.""" return self.uniquify(self.tobjects(protocol=protocol, groups=groups, model_ids=(model_id,), **kwargs))
[docs] def z_probe_files(self, protocol, groups='dev', **kwargs): """Returns the list of Z-Norm probe File objects to probe the model with the given model id of the given protocol for the given group that satisfy your query. For possible keyword arguments, please check the :py:meth:`zobjects` function.""" return self.uniquify(self.zobjects(protocol=protocol, groups=groups, **kwargs))