Source code for bob.db.frgc.models

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""XML file reader and stored lists for FRGC database
"""

import xml.sax
import os
import numpy

import bob.db.base

class File (bob.db.base.File):
  """This class is just the File object that is returned by the objects function.
  It will be created on need and is not stored anywhere."""
  def __init__(self, signature, presentation, path):
    # just call the base class constructor
    bob.db.base.File.__init__(self, file_id = presentation, path = path)
    self.client_id = signature

  # overwrite default make_path behaviour
[docs] def make_path(self, directory=None, extension=None): """Wraps the current path so that a complete path is formed. If directory and extension '.jpg' are specified, extensions are automatically replaced by '.JPG' if necessary. Keyword parameters: directory An optional directory name that will be prefixed to the returned result. extension An optional extension that will be suffixed to the returned filename. The extension normally includes the leading ``.`` character as in ``.jpg`` or ``.hdf5``. Returns a string containing the newly generated file path. """ if not directory: directory = '' if not extension: extension = '' # if extension is '.jpg', we have to check if we need to change it to '.JPG' full_path = os.path.join(directory, self.path + extension) if extension == '.jpg' and not os.path.isfile(full_path): capital_path = os.path.join(directory, self.path + '.JPG') if os.path.exists(capital_path): return capital_path return full_path
class FileSet: """This class is just the FileSet object that is returned by the object_sets function. It will be created on need and is not stored anywhere.""" def __init__(self, frgc_file): # The id is simply taken from the FRGCFile model id; WARNING: this ID is not stable and should not be stored anywhere. self.id = frgc_file.m_model self.client_id = frgc_file.m_signature self.files = [File(frgc_file.m_signature, presentation, frgc_file.m_files[presentation]) for presentation in frgc_file.m_files] # the path is simply a concatenation of the file names of all the files in the set; it is not really used anywhere self.path = self.files[0].path[:3] + self.files[0].path[3:].split('d')[0] + "d" + "+".join([self.files[i].path[3:].split('d')[1] for i in range(len(self.files))]) def __lt__(self, other): """Defines an order in the file sets.""" return self.path < other.path ################################################################################ ############# Internal IO and represenations of the FRGC files ################# # Global model index. This model index is generated on the fly and should not be stored between sessions. global model_index model_index = 1 class FRGCFile: """This class holds all desired information about a specific file, or set of files""" def __init__(self, signature): # the client id self.m_signature = signature # a unique model id, which is generated on the fly global model_index self.m_model = model_index model_index += 1 # the files: map from record id to path (w/o file extension) self.m_files = {} def add(self, presentation, path): # add the path to the list of files for this file (list) assert presentation not in self.m_files self.m_files[presentation] = os.path.splitext(path)[0] class ListFileReader (xml.sax.handler.ContentHandler): """Class for reading the FRGC xml image file lists""" def __init__(self): self.m_file = None self.m_file_list = [] def startDocument(self): pass def endDocument(self): pass def startElement(self, name, attrs): if name == 'biometric-signature' or name == 'complex-biometric-signature': self.m_file = FRGCFile(attrs['name']) elif name == 'presentation': assert self.m_file self.m_file.add(attrs['name'], attrs['file-name']) else: # other name pass def endElement(self, name): if name == 'biometric-signature' or name == 'complex-biometric-signature': # add a file(s) to the list self.m_file_list.append(self.m_file) # new identity self.m_file = None else: # other name pass class AnnotationFileReader (xml.sax.handler.ContentHandler): """Class for reading the FRGC metadata list""" def __init__(self): self.m_annotations = {} self.m_signature = None self.m_annotation_map = {} def startDocument(self): pass def endDocument(self): pass def startElement(self, name, attrs): if name == 'Recording': assert self.m_signature is None self.m_signature = attrs['recording_id'] self.m_annotations = {} self.m_use_recording = False elif name == 'LeftEyeCenter': self.m_annotations['leye'] = (int(attrs['y']), int(attrs['x'])) self.m_use_recording = True elif name == 'RightEyeCenter': self.m_annotations['reye'] = (int(attrs['y']), int(attrs['x'])) elif name == 'Nose': self.m_annotations['nose'] = (int(attrs['y']), int(attrs['x'])) elif name == 'Mouth': self.m_annotations['mouth'] = (int(attrs['y']), int(attrs['x'])) else: # other name pass def endElement(self, name): if name == 'Recording': assert self.m_signature assert self.m_signature not in self.m_annotation_map # add a file(s) to the list if self.m_use_recording: assert len(self.m_annotations) == 4 self.m_annotation_map[self.m_signature] = self.m_annotations # new identity self.m_signature = None else: # other name pass def read_mask(mask_file): """Reads the mask from file""" # open the file f = open(mask_file, 'rb') # read until the phrase "MB" is read b = None while b not in ('B', '', b'B', b''): m = None while m not in ('M', '', b'M', b''): m = f.read(1) b = f.read(1) if m not in ('M', b'M') or b not in ('B', b'B'): raise ValueError("The given mask file '" + mask_file + "' is invalid.") # read the mask size queries, targets = f.readline().split(b' ')[1:3] # read mask mask = numpy.fromfile(f, dtype = numpy.uint8) mask.shape = (int(queries), int(targets)) return mask # directories inside the FRGC database list_dir = "BEE_DIST/%(v)sFRGC2.0/signature_sets/experiments" mask_dir = "BEE_DIST/%(v)sFRGC2.0/Experiment%(e)s/output" meta_data_dir = "BEE_DIST/%(v)sFRGC2.0/metadata" dir_variants = ('linux/FRGC/', '') ###################################################### ##### lists ########################################## # the xml files for training, target and query xml_files = {'world':'FRGC_Exp_2.0.1_Training.xml', 'dev':{'2.0.1':'FRGC_Exp_2.0.1_Target.xml', '2.0.2':'FRGC_Exp_2.0.2_Target.xml', '2.0.4':{'enroll':'FRGC_Exp_2.0.4_Target.xml', 'probe':'FRGC_Exp_2.0.4_Query.xml'}}} # collector for lists that we already read. known_lists = {'world':None, 'dev':{'2.0.1':None, '2.0.2':None, '2.0.4':{'enroll':None, 'probe':None}}} # collector for files and models that have been read file_dict = {} model_dict = {} def get_list(base_dir, group, protocol=None, purpose=None): """Reads and returns the list of file names for the given group, purpose and protocol.""" def read_if_needed(file, list): """Reads the given list (if it has not been read yet) and fills the file and model dictionaries.""" if not list: files = [file % {'v':v} for v in dir_variants] found = None for f in files: if os.path.exists(f): found = f if found is None: raise xml.sax.SAXException("Could not find the any of the list files '%s'. Your FRGC base directory '%s' seems to be wrong or incomplete."%(files, base_dir)) handler = ListFileReader() # print "Reading xml list '" + file + "'" xml.sax.parse(found, handler) list = handler.m_file_list # integrate in dicts for g in list: for k,v in g.m_files.items(): file_dict[k] = g.m_signature model_dict[g.m_model] = g.m_signature return list if group == 'world': known_lists[group] = read_if_needed(os.path.join(base_dir, list_dir, xml_files[group]), known_lists[group]) return known_lists[group] if group == 'dev': if protocol in ('2.0.1', '2.0.2'): known_lists[group][protocol] = read_if_needed(os.path.join(base_dir, list_dir, xml_files[group][protocol]), known_lists[group][protocol]) return known_lists[group][protocol] if protocol == '2.0.4': known_lists[group][protocol][purpose] = read_if_needed(os.path.join(base_dir, list_dir, xml_files[group][protocol][purpose]), known_lists[group][protocol][purpose]) return known_lists[group][protocol][purpose] def client_from_file(file_id): """Returns the client id attached to the given file id. The file id must be already known (i.e., it must have been read from any list).""" assert file_id in file_dict return file_dict[file_id] def client_from_model(model_id): """Returns the client id attached to the given model id. The model id must be already known.""" assert model_id in model_dict return model_dict[model_id] ############################################################### ##### masks ################################################### # static collector for the mask files known_masks = {'2.0.1':{'maskI':None, 'maskII':None, 'maskIII':None}, '2.0.2':{'maskI':None, 'maskII':None, 'maskIII':None}, '2.0.4':{'maskI':None, 'maskII':None, 'maskIII':None}} def get_mask(base_dir, protocol, mask_type): """Returns the mask ([query_index], [target_index]) for the given protocol and mask type.""" if mask_type is None: return None if known_masks[protocol][mask_type] is None: mask_files = [os.path.join(base_dir, mask_dir%{'v':v, 'e':protocol[-1:]}, mask_type + ".mtx") for v in dir_variants] found = None for f in mask_files: if os.path.exists(f): found = f if found is None: raise xml.sax.SAXException("Could not find any of the mask files '%s'. Your FRGC base directory '%s' seems to be wrong or incomplete."%(mask_files, base_dir)) known_masks[protocol][mask_type] = read_mask(found) return known_masks[protocol][mask_type] ############################################################### ##### annotations ############################################### # static collector of the annotations global annotations annotations = None def get_annotations(base_dir, file_id): """Returns the eye, mouth and nose positions for the given file id.""" global annotations # check if annotations need to be read if not annotations: # read annotations file metadata_files = [os.path.join(base_dir, meta_data_dir%{'v':v}, "FRGC_2.0_Metadata.xml") for v in dir_variants] found = None for f in metadata_files: if os.path.exists(f): found = f if found is None: raise xml.sax.SAXException("Could not find one of the metadata file '%s'. Your FRGC base directory '%s' seems to be wrong or incomplete."%(metadata_files, base_dir)) # print "Reading positions file '" + metadata_file + "'" annotation_reader = AnnotationFileReader() xml.sax.parse(found, annotation_reader) annotations = annotation_reader.m_annotation_map return annotations[file_id]