#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This module provides the Dataset interface allowing the user to query the
Multi-PIE database in the most obvious ways.
"""
import os
from bob.db.base import utils
from .models import *
from .driver import Interface
import bob.db.base
SQLITE_FILE = Interface().files()[0]
[docs]class Database(bob.db.base.SQLiteDatabase):
"""The dataset class opens and maintains a connection opened to the Database.
It provides many different ways to probe for the characteristics of the data
and for the data itself inside the database.
"""
def __init__(self, original_directory=None, original_extension='.png', annotation_directory=None, annotation_extension='.pos'):
# NOTE: The default original extension '.png' is only valid for the
# "multiview" data, but not for the "highres" images, which are stored as
# '.jpg'
super(Database, self).__init__(SQLITE_FILE, File,
original_directory, original_extension)
self.annotation_directory = annotation_directory
self.annotation_extension = annotation_extension
[docs] def groups(self, protocol=None):
"""Returns the names of all registered groups"""
# Same as Client.group_choices for this database
return ProtocolPurpose.group_choices
[docs] def genders(self):
"""Returns the list of genders"""
return Client.gender_choices
[docs] def subworlds(self):
"""Returns the list of subworlds"""
return list(self.query(Subworld))
[docs] def has_subworld(self, name):
"""Tells if a certain subworld is available"""
return self.query(Subworld).filter(Subworld.name == name).count() != 0
[docs] def subworld_names(self):
"""Returns all registered subworld names"""
l = self.subworlds()
retval = [str(k.name) for k in l]
return retval
[docs] def expressions(self):
"""Returns the list of expressions"""
return list(self.query(Expression))
[docs] def has_expression(self, name):
"""Tells if a certain expression is available"""
return self.query(Expression).filter(Expression.name == name).count() != 0
[docs] def expression_names(self):
"""Returns all registered expression names"""
l = self.expressions()
retval = [str(k.name) for k in l]
return retval
[docs] def cameras(self):
"""Returns the list of cameras"""
return list(self.query(Camera))
[docs] def has_camera(self, name):
"""Tells if a certain camera is available"""
return self.query(Camera).filter(Camera.name == name).count() != 0
[docs] def camera_names(self):
"""Returns all registered camera names"""
return [str(c.name) for c in self.cameras()]
[docs] def clients(self, protocol=None, groups=None, subworld=None, genders=None, birthyears=None):
"""Returns a set of Clients for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the clients belong ('dev', 'eval', 'world')
subworld
Specify a split of the world data ('sub41', 'sub81', 'sub121', 'sub161')
In order to be considered, "world" should be in groups.
genders
The genders to which the clients belong ('female', 'male')
birthyears
The birth year of the clients (in the range [1900,2050])
Returns: A list containing all the Clients which have the given properties.
"""
VALID_BIRTHYEARS = list(range(1900, 2050))
VALID_BIRTHYEARS.append(57) # bug in subject_list.txt (57 instead of 1957)
protocol = self.check_parameters_for_validity(
protocol, 'protocol', self.protocol_names())
groups = self.check_parameters_for_validity(groups, 'group', self.groups())
if subworld:
subworld = self.check_parameters_for_validity(
subworld, 'subworld', self.subworld_names())
genders = self.check_parameters_for_validity(
genders, 'gender', self.genders())
birthyears = self.check_parameters_for_validity(
birthyears, 'birthyear', VALID_BIRTHYEARS)
# List of the clients
retval = []
# World data
if "world" in groups:
q = self.query(Client)
if subworld:
q = q.join((Subworld, Client.subworld)).filter(
Subworld.name.in_(subworld))
q = q.filter(Client.sgroup == 'world').\
filter(Client.gender.in_(genders)).\
filter(Client.birthyear.in_(birthyears)).\
order_by(Client.id)
retval += list(q)
# dev / eval data
if 'dev' in groups or 'eval' in groups:
q = self.query(Client).\
filter(and_(Client.sgroup != 'world', Client.sgroup.in_(groups))).\
filter(Client.gender.in_(genders)).\
filter(Client.birthyear.in_(birthyears)).\
order_by(Client.id)
retval += list(q)
return retval
[docs] def has_client_id(self, id):
"""Returns True if we have a client with a certain integer identifier"""
return self.query(Client).filter(Client.id == id).count() != 0
[docs] def client(self, id):
"""Returns the Client object in the database given a certain id. Raises
an error if that does not exist."""
return self.query(Client).filter(Client.id == id).one()
[docs] def tclients(self, protocol=None, groups=None):
"""Returns a set of T-Norm clients for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the clients belong ('dev', 'eval').
Returns: A list containing all the client ids belonging to the given group.
"""
groups = self.check_parameters_for_validity(
groups, "group", ('dev', 'eval'))
tgroups = []
if 'dev' in groups:
tgroups.append('eval')
if 'eval' in groups:
tgroups.append('dev')
return self.clients(protocol, tgroups)
[docs] def zclients(self, protocol=None, groups=None):
"""Returns a set of Z-Norm clients for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the clients belong ('dev', 'eval').
Returns: A list containing all the client ids belonging to the given group.
"""
groups = self.check_parameters_for_validity(
groups, "group", ('dev', 'eval'))
zgroups = []
if 'dev' in groups:
zgroups.append('eval')
if 'eval' in groups:
zgroups.append('dev')
return self.clients(protocol, zgroups)
[docs] def models(self, protocol=None, groups=None):
"""Returns a set of models for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the subjects attached to the models belong ('dev', 'eval', 'world')
Returns: A list containing all the models belonging to the given group.
"""
return self.clients(protocol, groups)
[docs] def model_ids(self, protocol=None, groups=None):
"""Returns a set of model ids for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the subjects attached to the models belong ('dev', 'eval', 'world')
Returns: A list containing the ids all the models belonging to the given group.
"""
return [client.id for client in self.clients(protocol, groups)]
[docs] def tmodels(self, protocol=None, groups=None):
"""Returns a set of T-Norm models for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the models belong ('dev', 'eval').
Returns: A list containing all the T-Norm models belonging to the given group.
"""
return self.tclients(protocol, groups)
[docs] def tmodel_ids(self, protocol=None, groups=None):
"""Returns a set of T-Norm model ids for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
groups
The groups to which the models belong ('dev', 'eval').
Returns: A list containing all ids of the T-Norm models belonging to the given group.
"""
return [client.id for client in self.tclients(protocol, groups)]
[docs] def get_client_id_from_model_id(self, model_id, **kwargs):
"""Returns the client_id attached to the given model_id
Keyword Parameters:
model_id
The model_id to consider
Returns: The client_id attached to the given model_id
"""
return model_id
[docs] def objects(self, protocol=None, purposes=None, model_ids=None, groups=None,
classes=None, subworld=None, expressions=None, cameras=None, world_sampling=1,
world_noflash=False, world_first=False, world_second=False, world_third=False,
world_fourth=False, world_nshots=None, world_shots=None):
"""Returns a set of Files for the specific query by the user.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
purposes
The purposes required to be retrieved ('enroll', 'probe', 'train') or a tuple
with several of them. If 'None' is given (this is the default), it is
considered the same as a tuple with all possible values. This field is
ignored for the data from the "world" group.
model_ids
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
the model_ids is performed.
groups
One of the groups ('dev', 'eval', 'world') or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as a
tuple with all possible values.
classes
The classes (types of accesses) to be retrieved ('client', 'impostor')
or a tuple with several of them. If 'None' is given (this is the
default), it is considered the same as a tuple with all possible values.
subworld
Specify a split of the world data ('sub41', 'sub81', 'sub121', 'sub161')
In order to be considered, "world" should be in groups.
expressions
The (face) expressions to be retrieved (use expression_names() to get the
list of expressions) or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as
a tuple with all possible values. Notice that some protocols only contain
images with 'neutral' expression.
cameras
The cameras to be retrieved (use camera_names() to get the list of cameras)
r a tuple with several of them. If 'None' is given (this is the default),
it is considered the same as a tuple with all possible values. The cameras
keyword has no impact for some protocols (frontal images ones).
world_sampling
Samples the files from the world data set. Keeps only files such as::
File.client_id + File.shot_id % world_sampling == 0
This argument should be an integer between 1 (keep everything) and 19.
It is not used if world_noflash is also set.
world_nshots
Only considers the n first shots from the world data set.
world_shots
Only considers the shots with the given ids.
world_noflash
Keeps the files from the world dataset recorded without flash (shot 0)
world_first
Only uses data from the first recorded session of each user of the world
dataset.
world_second
Only uses data from the second recorded session of each user of the world
dataset.
world_third
Only uses data from the third recorded session of each user of the world
dataset.
world_fourth
Only uses data from the fourth recorded session of each user of the world
dataset.
Returns: A set of Files with the given properties.
"""
protocol = self.check_parameters_for_validity(
protocol, 'protocol', self.protocol_names())
purposes = self.check_parameters_for_validity(
purposes, 'purpose', self.purposes())
groups = self.check_parameters_for_validity(groups, 'group', self.groups())
classes = self.check_parameters_for_validity(
classes, 'class', ('client', 'impostor'))
if subworld:
subworld = self.check_parameters_for_validity(
subworld, 'subworld', self.subworld_names())
if expressions:
expressions = self.check_parameters_for_validity(
expressions, 'expression', self.expression_names())
if cameras:
cameras = self.check_parameters_for_validity(
cameras, 'camera', self.camera_names())
import collections
if(model_ids is None):
model_ids = ()
elif(not isinstance(model_ids, collections.Iterable)):
model_ids = (model_ids,)
# Now query the database
retval = []
if 'world' in groups:
q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
filter(and_(Protocol.name.in_(protocol),
ProtocolPurpose.sgroup == 'world'))
if subworld:
q = q.join((Subworld, Client.subworld)).filter(
Subworld.name.in_(subworld))
if expressions:
q = q.join(Expression).filter(Expression.name.in_(expressions))
if cameras or world_nshots or world_shots or (world_sampling != 1 and world_noflash == False) or world_noflash:
q = q.join(FileMultiview)
if cameras:
q = q.join(Camera).filter(Camera.name.in_(cameras))
if world_nshots:
max1 = 19
max2 = 19
max3 = 19
max4 = 19
if world_nshots < 19:
max1 = world_nshots
max2 = 0
max3 = 0
max4 = 0
elif world_nshots < 38:
max2 = world_nshots - 19
max3 = 0
max4 = 0
elif world_nshots < 57:
max3 = world_nshots - 38
max4 = 0
else:
max4 = world_nshots - 57
q = q.filter(or_(and_(File.session_id == Client.first_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max1),
and_(File.recording_id == 2, FileMultiview.shot_id < max2))),
and_(File.session_id == Client.second_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max2),
and_(File.recording_id == 2, FileMultiview.shot_id < max3))),
and_(File.session_id == Client.third_session, or_(and_(File.recording_id == 1, FileMultiview.shot_id < max3),
and_(File.recording_id == 2, FileMultiview.shot_id < max4))),
and_(File.session_id == Client.fourth_session, FileMultiview.shot_id < max4)))
if world_shots:
q = q.filter(FileMultiview.shot_id.in_(world_shots))
if (world_sampling != 1 and world_noflash == False):
q = q.filter(((File.client_id + FileMultiview.shot_id) %
world_sampling) == 0)
if world_noflash:
q = q.filter(FileMultiview.shot_id == 0)
if world_first:
q = q.filter(and_(File.session_id == Client.first_session, or_(Client.first_session != 4,
and_(Client.first_session == 4, File.recording_id == 1))))
if world_second:
q = q.filter(or_(and_(Client.second_session != 4, File.session_id == Client.second_session),
or_(and_(Client.first_session == 4, and_(File.session_id == 4, File.recording_id == 2)),
and_(Client.second_session == 4, and_(File.session_id == 4, File.recording_id == 1)))))
if world_third:
q = q.filter(or_(and_(Client.third_session != 4, File.session_id == Client.third_session),
or_(and_(Client.second_session == 4, and_(File.session_id == 4, File.recording_id == 2)),
and_(Client.third_session == 4, and_(File.session_id == 4, File.recording_id == 1)))))
if world_fourth:
q = q.filter(or_(and_(Client.fourth_session != 4, File.session_id == Client.fourth_session),
or_(and_(Client.third_session == 4, and_(File.session_id == 4, File.recording_id == 2)),
and_(Client.fourth_session == 4, and_(File.session_id == 4, File.recording_id == 1)))))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.client_id, File.session_id,
File.recording_id, File.id)
retval += list(q)
if ('dev' in groups or 'eval' in groups):
if('enroll' in purposes):
q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
groups), ProtocolPurpose.purpose == 'enroll'))
if expressions:
q = q.join(Expression).filter(Expression.name.in_(expressions))
if cameras:
q = q.join(FileMultiview).join(
Camera).filter(Camera.name.in_(cameras))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.client_id, File.session_id,
File.recording_id, File.id)
retval += list(q)
if('probe' in purposes):
if('client' in classes):
q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
groups), ProtocolPurpose.purpose == 'probe'))
if expressions:
q = q.join(Expression).filter(Expression.name.in_(expressions))
if cameras:
q = q.join(FileMultiview).join(
Camera).filter(Camera.name.in_(cameras))
if model_ids:
q = q.filter(Client.id.in_(model_ids))
q = q.order_by(File.client_id, File.session_id,
File.recording_id, File.id)
retval += list(q)
if('impostor' in classes):
q = self.query(File).join(Client).join((ProtocolPurpose, File.protocol_purposes)).join(Protocol).\
filter(and_(Protocol.name.in_(protocol), ProtocolPurpose.sgroup.in_(
groups), ProtocolPurpose.purpose == 'probe'))
if expressions:
q = q.join(Expression).filter(Expression.name.in_(expressions))
if cameras:
q = q.join(FileMultiview).join(
Camera).filter(Camera.name.in_(cameras))
if len(model_ids) == 1:
q = q.filter(not_(Client.id.in_(model_ids)))
q = q.order_by(File.client_id, File.session_id,
File.recording_id, File.id)
retval += list(q)
return list(set(retval)) # To remove duplicates
[docs] def tobjects(self, protocol=None, model_ids=None, groups=None, expressions=None):
"""Returns a set of filenames for enrolling T-norm models for score
normalization.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
model_ids
Only retrieves the files for the provided list of model ids (claimed
client id). If 'None' is given (this is the default), no filter over
the model_ids is performed.
groups
The groups to which the clients belong ('dev', 'eval').
expressions
The (face) expressions to be retrieved (use expression_names() to get the
list of expressions) or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as
a tuple with all possible values. Notice that some protocols only contain
images with 'neutral' expression.
Returns: A list of Files with the given properties.
"""
groups = self.check_parameters_for_validity(
groups, "group", ('dev', 'eval'))
tgroups = []
if 'dev' in groups:
tgroups.append('eval')
if 'eval' in groups:
tgroups.append('dev')
return self.objects(protocol, 'enroll', model_ids, tgroups, 'client', None, expressions)
[docs] def zobjects(self, protocol=None, model_ids=None, groups=None, expressions=None):
"""Returns a set of filenames of impostors for Z-norm score normalization.
Keyword Parameters:
protocol
One of the Multi-PIE protocols (use protocol_names() to get the list of
available ones)
model_ids
Only retrieves the files for the provided list of model ids (client id).
If 'None' is given (this is the default), no filter over the model_ids
is performed.
groups
The groups to which the clients belong ('dev', 'eval').
expressions
The (face) expressions to be retrieved (use expression_names() to get the
list of expressions) or a tuple with several of them.
If 'None' is given (this is the default), it is considered the same as
a tuple with all possible values. Notice that some protocols only contain
images with 'neutral' expression.
Returns: A list of Files with the given properties.
"""
groups = self.check_parameters_for_validity(
groups, "group", ('dev', 'eval'))
zgroups = []
if 'dev' in groups:
zgroups.append('eval')
if 'eval' in groups:
zgroups.append('dev')
return self.objects(protocol, 'probe', model_ids, zgroups, 'client', None, expressions)
[docs] def annotations(self, file):
"""Reads the annotations for the given file id from file and returns them in a dictionary.
Depending on the view type of the file (i.e., the camera), different annotations might be returned.
If you have no copy of the annotations yet, you can download them under http://www.idiap.ch/resource/biometric,
where you also can find more information about the annotations.
Keyword parameters:
file
The ID of the file for which the annotations should be read.
Return value
The annotations as a dictionary, e.g., {'reye':(re_y,re_x), 'leye':(le_y,le_x), ...}
"""
if self.annotation_directory is None:
return None
annotation_file = file.make_path(
self.annotation_directory, self.annotation_extension)
if not os.path.exists(annotation_file):
raise IOError("The annotation file '%s' was not found" % annotation_file)
# read annotations from file
annotations = {}
with open(annotation_file) as f:
count = int(f.readline())
if count == 6:
# profile annotations
labels = ['eye', 'nose', 'mouth', 'lipt', 'lipb', 'chin']
elif count == 8:
# half profile annotations
labels = ['reye', 'leye', 'nose', 'mouthr',
'mouthl', 'lipt', 'lipb', 'chin']
elif count == 16:
# frontal image annotations
labels = ['reye', 'leye', 'reyeo', 'reyei', 'leyei', 'leyeo', 'nose', 'mouthr',
'mouthl', 'lipt', 'lipb', 'chin', 'rbrowo', 'rbrowi', 'lbrowi', 'lbrowo']
elif count == 2:
# for inclomplete annotations, only the two eye locations are available
labels = ['reye', 'leye']
else:
raise ValueError("The number %d of annotations in file '%s' is not handled." % (
count, annotation_file))
for i in range(count):
line = f.readline()
positions = line.split()
assert len(positions) == 2
annotations[labels[i]] = (float(positions[1]), float(positions[0]))
# done.
return annotations
[docs] def protocol_names(self):
"""Returns all registered protocol names"""
return [str(p.name) for p in self.protocols()]
[docs] def protocols(self):
"""Returns all registered protocols"""
return list(self.query(Protocol))
[docs] def has_protocol(self, name):
"""Tells if a certain protocol is available"""
return self.query(Protocol).filter(Protocol.name == name).count() != 0
[docs] def protocol(self, name):
"""Returns the protocol object in the database given a certain name. Raises
an error if that does not exist."""
return self.query(Protocol).filter(Protocol.name == name).one()
[docs] def protocol_purposes(self):
"""Returns all registered protocol purposes"""
return list(self.query(ProtocolPurpose))
[docs] def purposes(self):
"""Returns the list of allowed purposes"""
return ProtocolPurpose.purpose_choices
[docs] def t_model_ids(self, protocol, groups='dev', **kwargs):
"""Returns the list of model ids used for T-Norm of the given protocol for the given group that satisfy your query.
For possible keyword arguments, please check the :py:meth:`tmodel_ids` function."""
return self.uniquify(self.tmodel_ids(protocol=protocol, groups=groups, **kwargs))
[docs] def t_enroll_files(self, protocol, model_id, groups='dev', **kwargs):
"""Returns the list of T-Norm model enrollment File objects from the given model id of the given protocol for the given group that satisfy your query.
For possible keyword arguments, please check the :py:meth:`tobjects` function."""
return self.uniquify(self.tobjects(protocol=protocol, groups=groups, model_ids=(model_id,), **kwargs))
[docs] def z_probe_files(self, protocol, groups='dev', **kwargs):
"""Returns the list of Z-Norm probe File objects to probe the model with the given model id of the given protocol for the given group that satisfy your query.
For possible keyword arguments, please check the :py:meth:`zobjects` function."""
return self.uniquify(self.zobjects(protocol=protocol, groups=groups, **kwargs))