Source code for bob.db.verification.filelist.models

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Wed Oct 24 10:47:43 CEST 2012
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
This file defines simple Client and File interfaces that are comparable with other bob.db databases.
"""

import os
import fileinput
import re

import bob.db.verification.utils

[docs]class Client: """The clients of this database contain ONLY client ids. Nothing special.""" def __init__(self, client_id): self.id = client_id """The ID of the client, which is stored as a :py:class:`str` object."""
[docs]class File (bob.db.verification.utils.File): """Files of this database are composed from the client id, a file id, (a model id) and a claimed (client) id. Both the :py:attr:`bob.db.verification.utils.File.id` and the :py:attr:`bob.db.verification.utils.File.path` are set to the given ``file_name`` parameter. If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical. If the ``claimed_id`` is not specified, it is expected to be the ``client_id``. """ def __init__(self, file_name, client_id, model_id = None, claimed_id = None): # call base class constructor # the file id is the full file name bob.db.verification.utils.File.__init__(self, file_id = file_name, path = file_name, client_id = client_id) # Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file. # Hence, there might be several probe files with the same file id, but different model ids. # Therefore, please DO NOT USE the model_id outside of this class (or the according database queries). # when the model id is not specified, we use the client id instead self._model_id = client_id if model_id is None else model_id # when the claimed id is not specified, we use the client id instead self.claimed_id = client_id if claimed_id is None else claimed_id
############################################################################# ### internal access functions for the file lists; do not export! ############################################################################# class ListReader: def __init__(self, store_lists): self.m_read_lists = {} self.m_model_dicts = {} self.m_store_lists = store_lists def _read_multi_column_list(self, list_file): rows = [] if not os.path.isfile(list_file): raise RuntimeError('File %s does not exist.' % (list_file,)) try: for line in fileinput.input(list_file): parsed_line = re.findall('[\w/(-.)]+', line) if len(parsed_line): # perform some sanity checks if len(parsed_line) not in (2,3,4): raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % (line.rstrip(), list_file)) if len(rows) and len(rows[0]) != len(parsed_line): raise IOError("The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" % (parsed_line, list_file, rows[0])) # append the read line rows.append(parsed_line) fileinput.close() except IOError as e: raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e)) # return the read list as a vector of columns return rows def _read_column_list(self, list_file, column_count): # read the list rows = self._read_multi_column_list(list_file) # extract the file from the first two columns file_list = [] for row in rows: if column_count == 2: assert len(row) == 2 # we expect: filename client_id file_list.append(File(file_name = row[0], client_id = row[1])) elif column_count == 3: assert len(row) in (2, 3) # we expect: filename, model_id, client_id file_list.append(File(file_name = row[0], client_id = row[2] if len(row) > 2 else row[1], model_id = row[1])) elif column_count == 4: assert len(row) in (3, 4) # we expect: filename, model_id, claimed_id, client_id file_list.append(File(file_name = row[0], client_id = row[3] if len(row) > 3 else row[1], model_id = row[1], claimed_id = row[2])) else: raise ValueError("The given column count %d cannot be interpreted. This is a BUG, please report to the author." % column_count) return file_list def _create_model_dictionary(self, files): # remember model ids retval = {} for file in files: if file._model_id not in retval: retval[file._model_id] = file.client_id else: if retval[file._model_id] != file.client_id: raise ValueError("The read model id '%s' is associated to two different client ids '%s' and '%s'!" % (file._model_id, file.client_id, retval[file._model_id])) return retval def read_list(self, list_file, group, type = None): """Reads the list of Files from the given list file (if not done yet) and returns it.""" if group in ('world', 'optional_world_1', 'optional_world_2'): if group not in self.m_read_lists: # read the world list into memory list = self._read_column_list(list_file, 2) if self.m_store_lists: self.m_read_lists[group] = list return list # just return the previously read list return self.m_read_lists[group] else: if group not in self.m_read_lists: self.m_read_lists[group] = {} if type not in self.m_read_lists[group]: if type in ('for_models', 'for_tnorm'): list = self._read_column_list(list_file, 3) elif type == 'for_scores': list = self._read_column_list(list_file, 4) elif type in ('for_probes', 'for_znorm'): list = self._read_column_list(list_file, 2) else: raise ValueError("The given type must be one of %s, but not '%s'" %(('for_models', 'for_scores', 'for_probes', 'for_tnorm', 'for_znorm'), type)) if self.m_store_lists: self.m_read_lists[group][type] = list return list return self.m_read_lists[group][type] def read_models(self, list_file, group, type= None): """Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it""" assert group in ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2') assert type in ('for_models', 'for_tnorm') if group not in self.m_model_dicts: self.m_model_dicts[group] = {} if type not in self.m_model_dicts[group]: dict = self._create_model_dictionary(self.read_list(list_file, group, type)) if self.m_store_lists: self.m_model_dicts[group][type] = dict return dict return self.m_model_dicts[group][type]