Source code for msaf.features

"""
Each feature must inherit from the base class :class:`msaf.base.Features` to be
included in the whole framework.

Here is a list of all the available features:

.. autosummary::
    :toctree: generated/

    CQT
    MFCC
    PCP
    Tonnetz
    Tempogram
    Features
"""

from builtins import super
import librosa
import numpy as np

# Local stuff
from msaf import config
from msaf.base import Features
from msaf.exceptions import FeatureParamsError


[docs]class CQT(Features): """This class contains the implementation of the Constant-Q Transform. These features contain both harmonic and timbral content of the given audio signal. """
[docs] def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, n_bins=config.cqt.bins, norm=config.cqt.norm, filter_scale=config.cqt.filter_scale, ref_power=config.cqt.ref_power): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. n_bins: int > 0 Number of frequency bins for the CQT. norm: float Type of norm to use for basis function normalization. filter_scale: float The scale of the filter for the CQT. ref_power: str The reference power for logarithmic scaling. See `configdefaults.py` for the possible values. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the CQT parameters self.n_bins = n_bins self.norm = norm self.filter_scale = filter_scale if ref_power == "max": self.ref_power = np.max elif ref_power == "min": self.ref_power = np.min elif ref_power == "median": self.ref_power = np.median else: raise FeatureParamsError("Wrong value for ref_power")
@classmethod def get_id(self): """Identifier of these features.""" return "cqt" def compute_features(self): """Actual implementation of the features. Returns ------- cqt: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ linear_cqt = np.abs(librosa.cqt( self._audio, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale, real=False)) ** 2 cqt = librosa.logamplitude(linear_cqt, ref_power=self.ref_power).T return cqt
[docs]class MFCC(Features): """This class contains the implementation of the MFCC Features. The Mel-Frequency Cepstral Coefficients contain timbral content of a given audio signal. """
[docs] def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, n_fft=config.n_fft, n_mels=config.mfcc.n_mels, n_mfcc=config.mfcc.n_mfcc, ref_power=config.mfcc.ref_power): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. n_fft: int > 0 Number of frames for the FFT. n_mels: int > 0 Number of mel filters. n_mfcc: int > 0 Number of mel coefficients. ref_power: function The reference power for logarithmic scaling. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the MFCC parameters self.n_fft = n_fft self.n_mels = n_mels self.n_mfcc = n_mfcc if ref_power == "max": self.ref_power = np.max elif ref_power == "min": self.ref_power = np.min elif ref_power == "median": self.ref_power = np.median else: raise FeatureParamsError("Wrong value for ref_power")
@classmethod def get_id(self): """Identifier of these features.""" return "mfcc" def compute_features(self): """Actual implementation of the features. Returns ------- mfcc: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ S = librosa.feature.melspectrogram(self._audio, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels) log_S = librosa.logamplitude(S, ref_power=self.ref_power) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=self.n_mfcc).T return mfcc
[docs]class PCP(Features): """This class contains the implementation of the Pitch Class Profiles. The PCPs contain harmonic content of a given audio signal. """
[docs] def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, n_bins=config.pcp.bins, norm=config.pcp.norm, f_min=config.pcp.f_min, n_octaves=config.pcp.n_octaves): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. n_bins: int > 0 Number of bins for the CQT computation. norm: int > 0 Normalization parameter. f_min: float > 0 Minimum frequency. n_octaves: int > 0 Number of octaves. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the PCP parameters self.n_bins = n_bins self.norm = norm self.f_min = f_min self.n_octaves = n_octaves
@classmethod def get_id(self): """Identifier of these features.""" return "pcp" def compute_features(self): """Actual implementation of the features. Returns ------- pcp: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ audio_harmonic, _ = self.compute_HPSS() pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, fmin=self.f_min)) ** 2 pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=self.sr, hop_length=self.hop_length, n_octaves=self.n_octaves, fmin=self.f_min).T return pcp
[docs]class Tonnetz(Features): """This class contains the implementation of the Tonal Centroids. The Tonal Centroids (or Tonnetz) contain harmonic content of a given audio signal. """
[docs] def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, n_bins=config.tonnetz.bins, norm=config.tonnetz.norm, f_min=config.tonnetz.f_min, n_octaves=config.tonnetz.n_octaves): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. n_bins: int > 0 Number of bins for the CQT computation. norm: int > 0 Normalization parameter. f_min: float > 0 Minimum frequency. n_octaves: int > 0 Number of octaves. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the local parameters self.n_bins = n_bins self.norm = norm self.f_min = f_min self.n_octaves = n_octaves
@classmethod def get_id(self): """Identifier of these features.""" return "tonnetz" def compute_features(self): """Actual implementation of the features. Returns ------- tonnetz: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ pcp = PCP(self.file_struct, self.feat_type, self.sr, self.hop_length, self.n_bins, self.norm, self.f_min, self.n_octaves).features tonnetz = librosa.feature.tonnetz(chroma=pcp.T).T return tonnetz
[docs]class Tempogram(Features): """This class contains the implementation of the Tempogram feature. The Tempogram contains rhythmic content of a given audio signal. """
[docs] def __init__(self, file_struct, feat_type, sr=config.sample_rate, hop_length=config.hop_size, win_length=config.tempogram.win_length): """Constructor of the class. Parameters ---------- file_struct: `msaf.input_output.FileStruct` Object containing the file paths from where to extract/read the features. feat_type: `FeatureTypes` Enum containing the type of features. sr: int > 0 Sampling rate for the analysis. hop_length: int > 0 Hop size in frames for the analysis. win_length: int > 0 The size of the window for the tempogram. """ # Init the parent super().__init__(file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type) # Init the local parameters self.win_length = win_length
@classmethod def get_id(self): """Identifier of these features.""" return "tempogram" def compute_features(self): """Actual implementation of the features. Returns ------- tempogram: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ return librosa.feature.tempogram(self._audio, sr=self.sr, hop_length=self.hop_length, win_length=self.win_length).T