Source code for bob.pad.voice.extractor.glcms

#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Pavel Korshunov <pavel.korshunov@idiap.ch>
# Tue 17 May 15:43:22 CEST 2016

import numpy
import bob.io.base
import bob.ip.base
import bob.sp

import math

from .ratios import Ratios

import logging

logger = logging.getLogger("bob.pad.voice")


class GLCMs(Ratios):
    """
    Extractor that computes histograms of LBP features from a textogram, which, in turn, is computed by a cepstral
    or spectral extractor passed as an argument.
    """

    def __init__(self,
                 features_processor,  # another extractor that provides features for LBP computation
                 n_glcms=1,  # do not split the features on which we compute GLCM
                 offset_vector=[[0, 1], [1, 0]],
                 properties=True,
                 **kwargs
                 ):
        Ratios.__init__(self,
                        features_processor=features_processor,
                        n_bands=n_glcms,
                           **kwargs)
        self.offset_vector = offset_vector
        self.properties = properties
        self.properties_list = ["angular_second_moment", "energy", "variance", "contrast",
                                "auto_correlation", "correlation", "correlation_matlab",
                                "inverse_difference_moment", "sum_average", "sum_variance",
                                "sum_entropy", "entropy", "difference_variance", "difference_entropy",
                                "dissimilarity", "homogeneity", "cluster_prominence", "cluster_shade",
                                "maximum_probability", "information_measure_of_correlation_1",
                                "information_measure_of_correlation_2", "inverse_difference",
                                "inverse_difference_normalized", "inverse_difference_moment_normalized"]
        self.offset = numpy.array(self.offset_vector, dtype='int32')


[docs] def compute_glcms(self, data): # find the size of each textogram (a stip of features, for which we compute LBP) textogram_width = math.floor(self.features_processor.n_filters / self.n_bands) glcm_feats = [] glcm_op = bob.ip.base.GLCM(levels=8) glcm_op.offset = self.offset for i in range(0, self.n_bands): textogram = data[:, i * textogram_width:(i + 1) * textogram_width] if textogram.max(): textogram *= 255.0 / textogram.max() textogram = numpy.asarray(textogram, dtype=numpy.uint8) glcm_feat = numpy.ndarray((1, len(self.properties_list) * len(self.offset)), 'float64') glcm_feat.fill(numpy.NAN) glcm = glcm_op.extract(textogram) if self.properties: try: glcm_prop = glcm_op.properties_by_name(glcm, self.properties_list) glcm_feat = [x.tolist() for x in glcm_prop] # we get list of lists of features glcm_feat = numpy.asarray(glcm_feat, dtype=numpy.float64) glcm_feat.flatten() glcm_feat[glcm_feat < -1024] = -1024 # temporary hack excluding extreemly small values except ValueError as e: logger.error("- Extraction: Exceptions with GLCM properties computation: %s", repr(e)) naninfeat = numpy.isnan(glcm_feat) if naninfeat.any(): glcm_feat[naninfeat] = 0 logger.warn("- Extraction: GLCM features have NaNs!") else: glcm_feat = glcm.flatten() glcm_feats = numpy.append(glcm_feats, glcm_feat) return glcm_op, glcm_feats
[docs] def get_features(self, input_data, annotations): # spectrogram = SpectrogramExtended.__call__(input_data, annotations) if self.features_processor is not None: return self.features_processor(input_data, annotations) logger.info("- Extraction: spectrogram is empty, returning zero vector...") return numpy.array([numpy.zeros(len(self.properties_list)*len(self.offset))])
def __call__(self, input_data, annotations=None): """Computed LBP histograms from cepstral or spectrogram features""" # spectrogram = SpectrogramExtended.__call__(input_data, annotations) spectrogram = self.get_features(input_data, annotations) glcm, glcm_features = self.compute_glcms(spectrogram) ratios = [] if self.band_ratios: ratios = self.compute_ratios(spectrogram) # concatenate histograms together in one numpy array features = [] for i in range(0, self.n_bands): features.append(glcm_features[i].flatten()) # just put into the larger list features = numpy.append(ratios, features) logger.info("- Extraction: size of the GLCM-based feature vector of size %s", str(features.shape)) return numpy.asarray(features, dtype=numpy.float64) from .spectrogram_extended import SpectrogramExtended extractor = GLCMs(features_processor=SpectrogramExtended())