#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <laurent.el-shafey@idiap.ch>
#
# Copyright (C) 2012-2014 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Table models and functionality for the NIST SRE 2012 database.
"""
import os, numpy
from sqlalchemy import Table, Column, Integer, String, Boolean, ForeignKey, or_, and_, not_
from bob.db.base.sqlalchemy_migration import Enum, relationship
from sqlalchemy.orm import backref
from sqlalchemy.ext.declarative import declarative_base
import scipy.io.wavfile
import tempfile
import re
import bob.db.base
import logging
logger = logging.getLogger("bob.db.nist_sre12")
def build_fileid (path, side):
basename = os.path.splitext(os.path.basename(path))[0]
# check if basename includes sre12
msre12 = re.match (r'.*_sre12', basename)
bsre12 = True if msre12!= None else False
if bsre12:
# basename has sre12 already
return basename + '_' + side
else:
m = re.match (r'.*(SRE..).*',path)
if m != None:
sreid = m.group(1).lower()
return basename + '_' + sreid + '_' + side
else:
return basename + '_' + side
Base = declarative_base()
protocolPurpose_file_association = Table('protocolPurpose_file_association', Base.metadata,
Column('protocolPurpose_id', Integer, ForeignKey('protocolPurpose.id')),
Column('file_id', String(20), ForeignKey('file.id')))
protocolPurpose_model_association = Table('protocolPurpose_model_association', Base.metadata,
Column('protocolPurpose_id', Integer, ForeignKey('protocolPurpose.id')),
Column('model_id', String(20), ForeignKey('model.id')))
class ModelProbeLink(Base):
"""Model/probe associations, i.e. trial target speaker / test segment"""
__tablename__ = 'model_probe_link'
model_id = Column(String(20), ForeignKey('model.id'), primary_key=True)
file_id = Column(String(20), ForeignKey('file.id'), primary_key=True)
protocol_id = Column(String(20), ForeignKey('protocol.id'), primary_key=True)
def __init__(self, model_id, file_id, protocol_id):
self.model_id = model_id
self.file_id = file_id
self.protocol_id = protocol_id
def __repr__(self):
return "ModelProbe(%s, %s)" % (self.model_id, self.file_id)
class ModelEnrollLink(Base):
"""Model/enroll associations, i.e. files used for enrolling this model"""
__tablename__ = 'model_enroll_link'
model_id = Column(String(20), ForeignKey('model.id'), primary_key=True)
file_id = Column(String(20), ForeignKey('file.id'), primary_key=True)
protocol_id = Column(String(20), ForeignKey('protocol.id'), primary_key=True)
def __init__(self, model_id, file_id, protocol_id):
self.model_id = model_id
self.file_id = file_id
self.protocol_id = protocol_id
def __repr__(self):
return "ModelEnroll(%s, %s)" % (self.model_id, self.file_id)
[docs]class Model(Base):
"""Database models, marked by an integer identifier and the group they belong to"""
__tablename__ = 'model'
# Key identifier for the model
id = Column(String(20), primary_key=True)
gender_choices = ('male', 'female')
gender = Column(Enum(*gender_choices))
client_id = Column(String(20))
def __init__(self, id, client_id, gender):
self.id = id
self.client_id = client_id
self.gender = gender
def __repr__(self):
return "Model(%s, %s, %s)" % (self.id, self.gender, self.client_id)
[docs]class File(Base, bob.db.base.File):
"""Generic file container"""
__tablename__ = 'file'
# Key identifier for the file
id = Column(String(20), primary_key=True)
# Unique path to this file inside the database
path = Column(String(150))
side_choices = ('a','b')
side = Column(Enum(*side_choices))
client_id = Column(String(20))
def __init__(self, client_id, path, side):
# call base class constructor
self.id = build_fileid (path, side)
self.path = path
self.client_id = client_id
self.side = side
def __repr__(self):
"""This function describes how to convert a File object into a string."""
return "<File('%s': '%s', '%s', '%s')>" % (str(self.id), str(self.path), str(self.side), str(self.client_id))
[docs] def make_path(self, directory=None, extension='.sph', add_side=True):
"""Wraps the current path so that a complete path is formed
Keyword Parameters:
directory
An optional directory name that will be prefixed to the returned result.
extension
An optional extension that will be suffixed to the returned filename. The
extension normally includes the leading ``.`` character as in ``.jpg`` or
``.hdf5``.
Returns a string containing the newly generated file path.
"""
# assure that directory and extension are actually strings
# create the path
if add_side:
return str(os.path.join((directory or ''),self.path + '-' + self.side + (extension or '')) )
else:
return str(os.path.join((directory or ''),self.path + (extension or '')) )
[docs] def load(self, directory=None, extension='.sph'):
"""Loads the data at the specified location and using the given extension.
Override it if you need to load differently.
Keyword Parameters:
data
The data blob to be saved (normally a :py:class:`numpy.ndarray`).
directory
[optional] If not empty or None, this directory is prefixed to the final
file destination
extension
[optional] The extension of the filename - this will control the type of
output and the codec for saving the input blob.
"""
# get the path
abspath = self.make_path(directory or '', extension or '', add_side=False)
# logger.warn('abspath=' + abspath + '\n')
with tempfile.NamedTemporaryFile(suffix='.wav') as ftmp:
cmd = ['sph2pipe']
if self.side == 'a':
cmd += [
'-c 1',
'-p',
'-f rif',
abspath,
ftmp.name]
else:
cmd += [
'-c 2',
'-p',
'-f rif',
abspath,
ftmp.name]
# logger.warn('/bin/bash -c \"' + ' '.join(cmd) + '\"')
# os.system ('/bin/bash -c \"' + ' '.join(cmd) + '\"')
os.system (' '.join(cmd))
# logger.warn('after cmd' + '\n')
# if os.path.isfile(ftmp.name):
# logger.warn('exists')
# read mono wav file
rate, audio = scipy.io.wavfile.read(ftmp.name)
data = numpy.cast['float'](audio)
return rate, data
[docs]class Protocol(Base):
"""NIST SRE 2012 protocols"""
__tablename__ = 'protocol'
# Unique identifier for this protocol object
id = Column(Integer, primary_key=True)
# Name of the protocol associated with this object
name = Column(String(20), unique=True)
def __init__(self, name):
self.name = name
def __repr__(self):
return "Protocol('%s')" % (self.name)
[docs]class ProtocolPurpose(Base):
"""NIST SRE 2012 purposes"""
__tablename__ = 'protocolPurpose'
# Unique identifier for this protocol purpose object
id = Column(Integer, primary_key=True)
# Id of the protocol associated with this protocol purpose object
protocol_id = Column(Integer, ForeignKey('protocol.id')) # for SQL
# Group associated with this protocol purpose object
group_choices = ('eval',)
sgroup = Column(Enum(*group_choices))
# Purpose associated with this protocol purpose object
purpose_choices = ('enroll', 'probe')
purpose = Column(Enum(*purpose_choices))
# For Python: A direct link to the Protocol object that this ProtocolPurpose belongs to
protocol = relationship("Protocol", backref=backref("purposes", order_by=id))
# For Python: A direct link to the File objects associated with this ProtcolPurpose
files = relationship("File", secondary=protocolPurpose_file_association, backref=backref("protocolPurposes", order_by=id))
# For Python: A direct link to the model objects associated with this ProtcolPurpose
models = relationship("Model", secondary=protocolPurpose_model_association, backref=backref("protocolPurposes", order_by=id))
def __init__(self, protocol_id, sgroup, purpose):
self.protocol_id = protocol_id
self.sgroup = sgroup
self.purpose = purpose
def __repr__(self):
return "ProtocolPurpose('%s', '%s', '%s')" % (self.protocol.name, self.sgroup, self.purpose)