Source code for bob.db.verification.filelist.models

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Wed Oct 24 10:47:43 CEST 2012
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
This file defines simple Client and File interfaces that are comparable with other bob.db databases.
"""

import os
import fileinput
import re

import bob.db.verification.utils

[docs]class Client:
  """The clients of this database contain ONLY client ids. Nothing special."""
  def __init__(self, client_id):
    self.id = client_id
    """The ID of the client, which is stored as a :py:class:`str` object."""


[docs]class File (bob.db.verification.utils.File):
  """Files of this database are composed from the client id, a file id, (a model id) and a claimed (client) id.

  Both the :py:attr:`bob.db.verification.utils.File.id` and the :py:attr:`bob.db.verification.utils.File.path` are set to the given ``file_name`` parameter.
  If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical.
  If the ``claimed_id`` is not specified, it is expected to be the ``client_id``.
  """
  def __init__(self, file_name, client_id, model_id = None, claimed_id = None):
    # call base class constructor
    # the file id is the full file name
    bob.db.verification.utils.File.__init__(self, file_id = file_name, path = file_name, client_id = client_id)

    # Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file.
    # Hence, there might be several probe files with the same file id, but different model ids.
    # Therefore, please DO NOT USE the model_id outside of this class (or the according database queries).
    # when the model id is not specified, we use the client id instead
    self._model_id = client_id if model_id is None else model_id
    # when the claimed id is not specified, we use the client id instead
    self.claimed_id = client_id if claimed_id is None else claimed_id



#############################################################################
### internal access functions for the file lists; do not export!
#############################################################################

class ListReader:

  def __init__(self, store_lists):
    self.m_read_lists = {}
    self.m_model_dicts = {}
    self.m_store_lists = store_lists


  def _read_multi_column_list(self, list_file):
    rows = []
    if not os.path.isfile(list_file):
      raise RuntimeError('File %s does not exist.' % (list_file,))
    try:
      for line in fileinput.input(list_file):
        parsed_line = re.findall('[\w/(-.)]+', line)
        if len(parsed_line):
          # perform some sanity checks
          if len(parsed_line) not in (2,3,4):
            raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % (line.rstrip(), list_file))
          if len(rows) and len(rows[0]) != len(parsed_line):
            raise IOError("The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" % (parsed_line, list_file, rows[0]))
          # append the read line
          rows.append(parsed_line)
      fileinput.close()
    except IOError as e:
      raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e))

    # return the read list as a vector of columns
    return rows


  def _read_column_list(self, list_file, column_count):
    # read the list
    rows = self._read_multi_column_list(list_file)
    # extract the file from the first two columns
    file_list = []
    for row in rows:
      if column_count == 2:
        assert len(row) == 2
        # we expect: filename client_id
        file_list.append(File(file_name = row[0], client_id = row[1]))
      elif column_count == 3:
        assert len(row) in (2, 3)
        # we expect: filename, model_id, client_id
        file_list.append(File(file_name = row[0], client_id = row[2] if len(row) > 2 else row[1], model_id = row[1]))
      elif column_count == 4:
        assert len(row) in (3, 4)
        # we expect: filename, model_id, claimed_id, client_id
        file_list.append(File(file_name = row[0], client_id = row[3] if len(row) > 3 else row[1], model_id = row[1], claimed_id = row[2]))
      else:
        raise ValueError("The given column count %d cannot be interpreted. This is a BUG, please report to the author." % column_count)

    return file_list


  def _create_model_dictionary(self, files):
    # remember model ids
    retval = {}
    for file in files:
      if file._model_id not in retval:
        retval[file._model_id] = file.client_id
      else:
        if retval[file._model_id] != file.client_id:
          raise ValueError("The read model id '%s' is associated to two different client ids '%s' and '%s'!" % (file._model_id, file.client_id, retval[file._model_id]))
    return retval


  def read_list(self, list_file, group, type = None):
    """Reads the list of Files from the given list file (if not done yet) and returns it."""
    
    if group in ('world', 'optional_world_1', 'optional_world_2'):
      if group not in self.m_read_lists:
        # read the world list into memory
        list = self._read_column_list(list_file, 2)
        if self.m_store_lists:
          self.m_read_lists[group] = list
        return list
      # just return the previously read list
      return self.m_read_lists[group]

    else:
      if group not in self.m_read_lists:
        self.m_read_lists[group] = {}
      if type not in self.m_read_lists[group]:
        if type in ('for_models', 'for_tnorm'):
          list = self._read_column_list(list_file, 3)
        elif type == 'for_scores':
          list = self._read_column_list(list_file, 4)
        elif type in ('for_probes', 'for_znorm'):
          list = self._read_column_list(list_file, 2)
        else:
          raise ValueError("The given type must be one of %s, but not '%s'" %(('for_models', 'for_scores', 'for_probes', 'for_tnorm', 'for_znorm'), type))
        if self.m_store_lists:
          self.m_read_lists[group][type] = list
        return list
      return self.m_read_lists[group][type]

  def read_models(self, list_file, group, type= None):
    """Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it"""
    assert group in ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')
    assert type in ('for_models', 'for_tnorm')
    if group not in self.m_model_dicts:
      self.m_model_dicts[group] = {}
    if type not in self.m_model_dicts[group]:
      dict = self._create_model_dictionary(self.read_list(list_file, group, type))
      if self.m_store_lists:
        self.m_model_dicts[group][type] = dict
      return dict
    return self.m_model_dicts[group][type]