#!/usr/bin/env python
# coding: utf-8
import librosa
import logging
import numpy as np
from scipy.spatial import distance
from scipy import signal
from scipy.ndimage import filters
import pylab as plt
import msaf
from msaf.algorithms.interface import SegmenterInterface
def median_filter(X, M=8):
"""Median filter along the first axis of the feature matrix X."""
for i in range(X.shape[1]):
X[:, i] = filters.median_filter(X[:, i], size=M)
return X
def compute_gaussian_krnl(M):
"""Creates a gaussian kernel following Foote's paper."""
g = signal.gaussian(M, M // 3., sym=True)
G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
G[M // 2:, :M // 2] = -G[M // 2:, :M // 2]
G[:M // 2, M // 2:] = -G[:M // 2, M // 2:]
return G
def compute_ssm(X, metric="seuclidean"):
"""Computes the self-similarity matrix of X."""
D = distance.pdist(X, metric=metric)
D = distance.squareform(D)
D /= D.max()
return 1 - D
def compute_nc(X, G):
"""Computes the novelty curve from the self-similarity matrix X and
the gaussian kernel G."""
N = X.shape[0]
M = G.shape[0]
nc = np.zeros(N)
for i in range(M // 2, N - M // 2 + 1):
nc[i] = np.sum(X[i - M // 2:i + M // 2, i - M // 2:i + M // 2] * G)
# Normalize
nc += nc.min()
nc /= nc.max()
return nc
def pick_peaks(nc, L=16):
"""Obtain peaks from a novelty curve using an adaptive threshold."""
offset = nc.mean() / 20.
nc = filters.gaussian_filter1d(nc, sigma=4) # Smooth out nc
th = filters.median_filter(nc, size=L) + offset
#th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset
peaks = []
for i in range(1, nc.shape[0] - 1):
# is it a peak?
if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
# is it above the threshold?
if nc[i] > th[i]:
peaks.append(i)
#plt.plot(nc)
#plt.plot(th)
#for peak in peaks:
#plt.axvline(peak)
#plt.show()
return peaks
[docs]class Segmenter(SegmenterInterface):
"""
This script identifies the boundaries of a given track using the Foote
method:
Foote, J. (2000). Automatic Audio Segmentation Using a Measure Of Audio
Novelty. In Proc. of the IEEE International Conference of Multimedia and
Expo (pp. 452–455). New York City, NY, USA.
"""
def processFlat(self):
"""Main process.
Returns
-------
est_idxs : np.array(N)
Estimated indeces the segment boundaries in frames.
est_labels : np.array(N-1)
Estimated labels for the segments.
"""
# Preprocess to obtain features
F = self._preprocess()
# Normalize
F = msaf.utils.normalize(F, norm_type=self.config["bound_norm_feats"])
# Make sure that the M_gaussian is even
if self.config["M_gaussian"] % 2 == 1:
self.config["M_gaussian"] += 1
# Median filter
F = median_filter(F, M=self.config["m_median"])
#plt.imshow(F.T, interpolation="nearest", aspect="auto"); plt.show()
# Self similarity matrix
S = compute_ssm(F)
# Compute gaussian kernel
G = compute_gaussian_krnl(self.config["M_gaussian"])
#plt.imshow(S, interpolation="nearest", aspect="auto"); plt.show()
# Compute the novelty curve
nc = compute_nc(S, G)
# Find peaks in the novelty curve
est_idxs = pick_peaks(nc, L=self.config["L_peaks"])
# Add first and last frames
est_idxs = np.concatenate(([0], est_idxs, [F.shape[0] - 1]))
# Empty labels
est_labels = np.ones(len(est_idxs) - 1) * -1
# Post process estimations
est_idxs, est_labels = self._postprocess(est_idxs, est_labels)
return est_idxs, est_labels
# plt.figure(1)
# plt.plot(nc);
# [plt.axvline(p, color="m") for p in est_bounds]
# [plt.axvline(b, color="g") for b in ann_bounds]
# plt.figure(2)
# plt.imshow(S, interpolation="nearest", aspect="auto")
# [plt.axvline(b, color="g") for b in ann_bounds]
# plt.show()