#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <laurent.el-shafey@idiap.ch>
# Copyright (C) 2012-2014 Idiap Research Institute, Martigny, Switzerland
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Table models and functionality for the NIST SRE 2012 database.
import os, numpy
from sqlalchemy import Table, Column, Integer, String, Boolean, ForeignKey, or_, and_, not_
from bob.db.base.sqlalchemy_migration import Enum, relationship
from sqlalchemy.orm import backref
from sqlalchemy.ext.declarative import declarative_base
import scipy.io.wavfile
import tempfile
import re
import bob.db.base
import logging
logger = logging.getLogger("bob.db.nist_sre12")
def build_fileid (path, side):
basename = os.path.splitext(os.path.basename(path))[0]
# check if basename includes sre12
msre12 = re.match (r'.*_sre12', basename)
bsre12 = True if msre12!= None else False
if bsre12:
# basename has sre12 already
return basename + '_' + side
m = re.match (r'.*(SRE..).*',path)
if m != None:
sreid = m.group(1).lower()
return basename + '_' + sreid + '_' + side
return basename + '_' + side
Base = declarative_base()
protocolPurpose_file_association = Table('protocolPurpose_file_association', Base.metadata,
Column('protocolPurpose_id', Integer, ForeignKey('protocolPurpose.id')),
Column('file_id', String(20), ForeignKey('file.id')))
protocolPurpose_model_association = Table('protocolPurpose_model_association', Base.metadata,
Column('protocolPurpose_id', Integer, ForeignKey('protocolPurpose.id')),
Column('model_id', String(20), ForeignKey('model.id')))
class ModelProbeLink(Base):
"""Model/probe associations, i.e. trial target speaker / test segment"""
__tablename__ = 'model_probe_link'
model_id = Column(String(20), ForeignKey('model.id'), primary_key=True)
file_id = Column(String(20), ForeignKey('file.id'), primary_key=True)
protocol_id = Column(String(20), ForeignKey('protocol.id'), primary_key=True)
def __init__(self, model_id, file_id, protocol_id):
self.model_id = model_id
self.file_id = file_id
self.protocol_id = protocol_id
def __repr__(self):
return "ModelProbe(%s, %s)" % (self.model_id, self.file_id)
class ModelEnrollLink(Base):
"""Model/enroll associations, i.e. files used for enrolling this model"""
__tablename__ = 'model_enroll_link'
model_id = Column(String(20), ForeignKey('model.id'), primary_key=True)
file_id = Column(String(20), ForeignKey('file.id'), primary_key=True)
protocol_id = Column(String(20), ForeignKey('protocol.id'), primary_key=True)
def __init__(self, model_id, file_id, protocol_id):
self.model_id = model_id
self.file_id = file_id
self.protocol_id = protocol_id
def __repr__(self):
return "ModelEnroll(%s, %s)" % (self.model_id, self.file_id)
[docs]class Model(Base):
"""Database models, marked by an integer identifier and the group they belong to"""
__tablename__ = 'model'
# Key identifier for the model
id = Column(String(20), primary_key=True)
gender_choices = ('male', 'female')
gender = Column(Enum(*gender_choices))
client_id = Column(String(20))
def __init__(self, id, client_id, gender):
self.id = id
self.client_id = client_id
self.gender = gender
def __repr__(self):
return "Model(%s, %s, %s)" % (self.id, self.gender, self.client_id)
[docs]class File(Base, bob.db.base.File):
"""Generic file container"""
__tablename__ = 'file'
# Key identifier for the file
id = Column(String(20), primary_key=True)
# Unique path to this file inside the database
path = Column(String(150))
side_choices = ('a','b')
side = Column(Enum(*side_choices))
client_id = Column(String(20))
def __init__(self, client_id, path, side):
# call base class constructor
self.id = build_fileid (path, side)
self.path = path
self.client_id = client_id
self.side = side
def __repr__(self):
"""This function describes how to convert a File object into a string."""
return "<File('%s': '%s', '%s', '%s')>" % (str(self.id), str(self.path), str(self.side), str(self.client_id))
[docs] def make_path(self, directory=None, extension='.sph', add_side=True):
"""Wraps the current path so that a complete path is formed
Keyword Parameters:
An optional directory name that will be prefixed to the returned result.
An optional extension that will be suffixed to the returned filename. The
extension normally includes the leading ``.`` character as in ``.jpg`` or
Returns a string containing the newly generated file path.
# assure that directory and extension are actually strings
# create the path
if add_side:
return str(os.path.join((directory or ''),self.path + '-' + self.side + (extension or '')) )
return str(os.path.join((directory or ''),self.path + (extension or '')) )
[docs] def load(self, directory=None, extension='.sph'):
"""Loads the data at the specified location and using the given extension.
Override it if you need to load differently.
Keyword Parameters:
The data blob to be saved (normally a :py:class:`numpy.ndarray`).
[optional] If not empty or None, this directory is prefixed to the final
file destination
[optional] The extension of the filename - this will control the type of
output and the codec for saving the input blob.
# get the path
abspath = self.make_path(directory or '', extension or '', add_side=False)
# logger.warn('abspath=' + abspath + '\n')
with tempfile.NamedTemporaryFile(suffix='.wav') as ftmp:
cmd = ['sph2pipe']
if self.side == 'a':
cmd += [
'-c 1',
'-f rif',
cmd += [
'-c 2',
'-f rif',
# logger.warn('/bin/bash -c \"' + ' '.join(cmd) + '\"')
# os.system ('/bin/bash -c \"' + ' '.join(cmd) + '\"')
os.system (' '.join(cmd))
# logger.warn('after cmd' + '\n')
# if os.path.isfile(ftmp.name):
# logger.warn('exists')
# read mono wav file
rate, audio = scipy.io.wavfile.read(ftmp.name)
data = numpy.cast['float'](audio)
return rate, data
[docs]class Protocol(Base):
"""NIST SRE 2012 protocols"""
__tablename__ = 'protocol'
# Unique identifier for this protocol object
id = Column(Integer, primary_key=True)
# Name of the protocol associated with this object
name = Column(String(20), unique=True)
def __init__(self, name):
self.name = name
def __repr__(self):
return "Protocol('%s')" % (self.name)
[docs]class ProtocolPurpose(Base):
"""NIST SRE 2012 purposes"""
__tablename__ = 'protocolPurpose'
# Unique identifier for this protocol purpose object
id = Column(Integer, primary_key=True)
# Id of the protocol associated with this protocol purpose object
protocol_id = Column(Integer, ForeignKey('protocol.id')) # for SQL
# Group associated with this protocol purpose object
group_choices = ('eval',)
sgroup = Column(Enum(*group_choices))
# Purpose associated with this protocol purpose object
purpose_choices = ('enroll', 'probe')
purpose = Column(Enum(*purpose_choices))
# For Python: A direct link to the Protocol object that this ProtocolPurpose belongs to
protocol = relationship("Protocol", backref=backref("purposes", order_by=id))
# For Python: A direct link to the File objects associated with this ProtcolPurpose
files = relationship("File", secondary=protocolPurpose_file_association, backref=backref("protocolPurposes", order_by=id))
# For Python: A direct link to the model objects associated with this ProtcolPurpose
models = relationship("Model", secondary=protocolPurpose_model_association, backref=backref("protocolPurposes", order_by=id))
def __init__(self, protocol_id, sgroup, purpose):
self.protocol_id = protocol_id
self.sgroup = sgroup
self.purpose = purpose
def __repr__(self):
return "ProtocolPurpose('%s', '%s', '%s')" % (self.protocol.name, self.sgroup, self.purpose)