Source code for bob.bio.gmm.tools.command_line

import os
import sys
import types

import bob.core
logger = bob.core.log.setup("bob.bio.gmm")

from bob.bio.base.tools import FileSelector

[docs]def add_parallel_gmm_options(parsers, sub_module = None):
  """Add the options for parallel UBM training to the given parsers."""

  flag_group = parsers['flag']
  flag_group.add_argument('-l', '--limit-training-data', type=int,
      help = 'Limit the number of training examples used for KMeans initialization and the GMM initialization')

  flag_group.add_argument('-k', '--kmeans-start-iteration', type=int, default=0,
      help = 'Specify the first iteration for the KMeans training (i.e. to restart from there)')
  flag_group.add_argument('-m', '--gmm-start-iteration', type=int, default=0,
      help = 'Specify the first iteration for the GMM training (i.e. to restart from there)')
  flag_group.add_argument('-C', '--clean-intermediate', action='store_true',
      help = 'Clean up temporary files of older iterations?')

  sub_dir_group = parsers['sub-dir']
  sub_dir_group.add_argument('--kmeans-directory', default = 'kmeans_temp',
      help = 'The sub-directory (relative to --temp-directory), where intermediate kmeans files should be stored')
  sub_dir_group.add_argument('--gmm-directory',  default = 'gmm_temp',
      help = 'The sub-directory (relative to --temp-directory), where intermediate gmm files should be stored')

  if sub_module is not None:
    sub_dir_group.add_argument('--projected-gmm-directory', default = 'projected_gmm',
        help = 'The sub-directory (relative to --temp-directory), where projected gmm training files should be stored')

  if sub_module == 'ivector':
    sub_dir_group.add_argument('--ivector-directory',  default = 'ivector_temp',
        help = 'The sub-directory (relative to --temp-directory), where intermediate ivector files should be stored')
    sub_dir_group.add_argument('--projected-ivector-directory',  default = 'projected_ivector_temp',
        help = 'The sub-directory (relative to --temp-directory), where intermediate projected ivector training files should be stored')
    sub_dir_group.add_argument('--whitened-directory',  default = 'whitened_temp',
        help = 'The sub-directory (relative to --temp-directory), where intermediate whitened ivector training files should be stored')    
    sub_dir_group.add_argument('--lda-projected-directory',  default = 'lda_projected_temp',
        help = 'The sub-directory (relative to --temp-directory), where intermediate LDA projected ivector training files should be stored')    
    sub_dir_group.add_argument('--wccn-projected-directory',  default = 'wccn_projected_temp',
        help = 'The sub-directory (relative to --temp-directory), where intermediate WCCN projected ivector training files should be stored')            
    flag_group.add_argument('-i', '--tv-start-iteration', type=int, default=0,
        help = 'Specify the first iteration for the IVector training (i.e. to restart from there)')


# Functions to be added to the FileSelector class, once it is instantiated
def _kmeans_intermediate_file(self, round):
  return os.path.join(self.directories['kmeans'], 'round_%05d' % round, 'kmeans.hdf5')

def _kmeans_stats_file(self, round, start_index, end_index):
  return os.path.join(self.directories['kmeans'], 'round_%05d' % round, 'stats-%05d-%05d.hdf5' % (start_index, end_index))

def _gmm_intermediate_file(self, round):
  return os.path.join(self.directories['gmm'], 'round_%05d' % round, 'ubm.hdf5')

def _gmm_stats_file(self, round, start_index, end_index):
  return os.path.join(self.directories['gmm'], 'round_%05d' % round, 'stats-%05d-%05d.hdf5' % (start_index, end_index))


def _ivector_intermediate_file(self, round):
  return os.path.join(self.directories['ivector'], 'round_%05d' % round, 'tv.hdf5')

def _ivector_stats_file(self, round, start_index, end_index):
  return os.path.join(self.directories['ivector'], 'round_%05d' % round, 'stats-%05d-%05d.hdf5' % (start_index, end_index))


[docs]def initialize_parallel_gmm(args, sub_module = None):
  # get the relevant sub_directory, which depends on the database and the prorocol
  protocol = 'None' if args.database.protocol is None else args.database.protocol
  extractor_sub_dir = protocol if args.database.training_depends_on_protocol and args.extractor.requires_training else '.'
  sub_dir = protocol if args.database.training_depends_on_protocol else '.'

  fs = FileSelector.instance()

  # add relevant **functions** to file selector object
  fs.kmeans_intermediate_file = types.MethodType(_kmeans_intermediate_file, fs)
  fs.kmeans_stats_file =  types.MethodType(_kmeans_stats_file, fs)
  fs.gmm_intermediate_file = types.MethodType(_gmm_intermediate_file, fs)
  fs.gmm_stats_file = types.MethodType(_gmm_stats_file, fs)

  # add relevant directories to file selector object
  fs.directories['kmeans'] = os.path.join(args.temp_directory, sub_dir, args.kmeans_directory)
  fs.kmeans_file = os.path.join(args.temp_directory, sub_dir, "kmeans.hdf5")
  fs.directories['gmm'] = os.path.join(args.temp_directory, sub_dir, args.gmm_directory)
  if sub_module is None:
    fs.ubm_file = fs.projector_file
  else:
    fs.ubm_file = os.path.join(args.temp_directory, sub_dir, "ubm.hdf5")
    fs.directories['projected_gmm'] = os.path.join(args.temp_directory, sub_dir, args.projected_gmm_directory)
    if sub_module == 'ivector':
      fs.ivector_intermediate_file = types.MethodType(_ivector_intermediate_file, fs)
      fs.ivector_stats_file = types.MethodType(_ivector_stats_file, fs)

      fs.directories['ivector'] = os.path.join(args.temp_directory, sub_dir, args.ivector_directory)
      fs.tv_file = os.path.join(args.temp_directory, sub_dir, "tv.hdf5")
      fs.whitener_file = os.path.join(args.temp_directory, sub_dir, "whitener.hdf5")
      fs.lda_file = os.path.join(args.temp_directory, sub_dir, "lda.hdf5")
      fs.wccn_file = os.path.join(args.temp_directory, sub_dir, "wccn.hdf5")
      fs.plda_file = os.path.join(args.temp_directory, sub_dir, "plda.hdf5")
      fs.directories['projected_ivector'] = os.path.join(args.temp_directory, sub_dir, args.projected_ivector_directory)
      fs.directories['whitened'] = os.path.join(args.temp_directory, sub_dir, args.whitened_directory)
      fs.directories['lda_projected'] = os.path.join(args.temp_directory, sub_dir, args.lda_projected_directory)
      fs.directories['wccn_projected'] = os.path.join(args.temp_directory, sub_dir, args.wccn_projected_directory)