Source code for audiolazy.lazy_auditory

# -*- coding: utf-8 -*-
# This file is part of AudioLazy, the signal processing Python package.
# Copyright (C) 2012-2016 Danilo de Jesus da Silva Bellini
#
# AudioLazy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Peripheral auditory modeling module
"""

import math

# Audiolazy internal imports
from .lazy_core import StrategyDict
from .lazy_misc import elementwise
from .lazy_filters import z, CascadeFilter, ZFilter, resonator
from .lazy_math import pi, exp, cos, sin, sqrt, factorial
from .lazy_stream import thub
from .lazy_compat import xzip
from .lazy_text import format_docstring

__all__ = ["erb", "gammatone", "gammatone_erb_constants", "phon2dB"]


erb = StrategyDict("erb")
erb._doc_template = """
  Equivalent Rectangular Model (ERB) from {authors} ({year}).

  This is a model for a single filter bandwidth for auditory filter modeling,
  taken from:
  {__doc__}
  Parameters
  ----------
  freq :
    Frequency, in rad/sample if second parameter is given, in Hz otherwise.
  Hz :
    Frequency conversion "Hz" from sHz function, i.e., ``sHz(rate)[1]``.
    If this value is not given, both input and output will be in Hz.

  Returns
  -------
  Frequency range size, in rad/sample if second parameter is given, in Hz
  otherwise.
"""

@erb.strategy("gm90", "glasberg_moore_90", "glasberg_moore")
@elementwise("freq", 0)
@format_docstring(erb._doc_template, authors="Glasberg and Moore", year=1990)
def erb(freq, Hz=None):
  """
    ``B. R. Glasberg and B. C. J. Moore, "Derivation of auditory filter
    shapes from notched-noise data". Hearing Research, vol. 47, 1990, pp.
    103-108.``
  """
  if Hz is None:
    if freq < 7: # Perhaps user tried something up to 2 * pi
      raise ValueError("Frequency out of range.")
    Hz = 1
  fHz = freq / Hz
  result = 24.7 * (4.37e-3 * fHz + 1.)
  return result * Hz


@erb.strategy("mg83", "moore_glasberg_83")
@elementwise("freq", 0)
@format_docstring(erb._doc_template, authors="Moore and Glasberg", year=1983)
def erb(freq, Hz=None):
  """
    ``B. C. J. Moore and B. R. Glasberg, "Suggested formulae for calculating
    auditory filter bandwidths and excitation patterns". J. Acoust. Soc.
    Am., 74, 1983, pp. 750-753.``
  """
  if Hz is None:
    if freq < 7: # Perhaps user tried something up to 2 * pi
      raise ValueError("Frequency out of range.")
    Hz = 1
  fHz = freq / Hz
  result = 6.23e-6 * fHz ** 2 + 93.39e-3 * fHz + 28.52
  return result * Hz


[docs]def gammatone_erb_constants(n): """ Constants for using the real bandwidth in the gammatone filter, given its order. Returns a pair :math:`(x, y) = (1/a_n, c_n)`. Based on equations from: ``Holdsworth, J.; Patterson, R.; Nimmo-Smith, I.; Rice, P. Implementing a GammaTone Filter Bank. In: SVOS Final Report, Annex C, Part A: The Auditory Filter Bank. 1988.`` First returned value is a bandwidth compensation for direct use in the gammatone formula: >>> x, y = gammatone_erb_constants(4) >>> central_frequency = 1000 >>> round(x, 3) 1.019 >>> bandwidth = x * erb["moore_glasberg_83"](central_frequency) >>> round(bandwidth, 2) 130.52 Second returned value helps us find the ``3 dB`` bandwidth as: >>> x, y = gammatone_erb_constants(4) >>> central_frequency = 1000 >>> bandwidth3dB = x * y * erb["moore_glasberg_83"](central_frequency) >>> round(bandwidth3dB, 2) 113.55 """ tnt = 2 * n - 2 return (factorial(n - 1) ** 2 / (pi * factorial(tnt) * 2 ** -tnt), 2 * (2 ** (1. / n) - 1) ** .5 )
gammatone = StrategyDict("gammatone") gammatone._doc_template = """ Gammatone filter based on {model}. Model is described in: {__doc__} Parameters ---------- freq : Frequency, in rad/sample. bandwidth : Frequency range size, in rad/sample. See ``gammatone_erb_constants`` for more information about how you can find this. {extra_params} Returns ------- A CascadeFilter object with ZFilter filters, each of them a pole-conjugated IIR filter model. Gain is normalized to have peak with 0 dB (1.0 amplitude). The total number of poles is twice the value of eta (conjugated pairs), one pair for each ZFilter. """ @gammatone.strategy("sampled") @format_docstring(gammatone._doc_template, model="a sampled impulse response", extra_params="\n ".join([ "phase :", " Phase, in radians. Defaults to zero (cosine)." "eta :", " Gammatone filter order. Defaults to 4.", "" # Skip a line ]), ) def gammatone(freq, bandwidth, phase=0, eta=4): """ ``Bellini, D. J. S. "AudioLazy: Processamento digital de sinais expressivo e em tempo real", IME-USP, Mastership Thesis, 2013.`` This implementation have the impulse response (for each sample ``n``, keeping the input parameter names): .. math:: n^{{eta - 1}} e^{{- bandwidth \cdot n}} \cos(freq \cdot n + phase) """ assert eta >= 1 A = exp(-bandwidth) numerator = cos(phase) - A * cos(freq - phase) * z ** -1 denominator = 1 - 2 * A * cos(freq) * z ** -1 + A ** 2 * z ** -2 filt = (numerator / denominator).diff(n=eta-1, mul_after=-z) # Filter is done, but the denominator might have some numeric loss f0 = ZFilter(filt.numpoly) / denominator f0 /= abs(f0.freq_response(freq)) # Max gain == 1.0 (0 dB) fn = 1 / denominator fn /= abs(fn.freq_response(freq)) return CascadeFilter([f0] + [fn] * (eta - 1)) @gammatone.strategy("slaney") @format_docstring(gammatone._doc_template, extra_params="", model="Malcolm Slaney's IIR cascading filter model") def gammatone(freq, bandwidth): """ ``Slaney, M. "An Efficient Implementation of the Patterson-Holdsworth Auditory Filter Bank", Apple Computer Technical Report #35, 1993.`` """ A = exp(-bandwidth) cosw = cos(freq) sinw = sin(freq) sig = [1., -1.] coeff = [cosw + s1 * (sqrt(2) + s2) * sinw for s1 in sig for s2 in sig] numerator = [1 - A * c * z ** -1 for c in coeff] denominator = 1 - 2 * A * cosw * z ** -1 + A ** 2 * z ** -2 filt = CascadeFilter(num / denominator for num in numerator) return CascadeFilter(f / abs(f.freq_response(freq)) for f in filt) @gammatone.strategy("klapuri") @format_docstring(gammatone._doc_template, extra_params="", model="Anssi Klapuri's IIR cascading filter model") def gammatone(freq, bandwidth): """ ``A. Klapuri, "Multipich Analysis of Polyphonic Music and Speech Signals Using an Auditory Model". IEEE Transactions on Audio, Speech and Language Processing, vol. 16, no. 2, 2008, pp. 255-266.`` """ bw = thub(bandwidth, 1) bw2 = thub(bw * 2, 4) freq = thub(freq, 4) resons = [resonator.z_exp, resonator.poles_exp] * 2 return CascadeFilter(reson(freq, bw2) for reson in resons) phon2dB = StrategyDict("phon2dB") @phon2dB.strategy("iso226", "iso226_2003", "iso_fdis_226_2003") def phon2dB(loudness=None): """ Loudness in phons to Sound Pressure Level (SPL) in dB using the ISO/FDIS 226:2003 model. This function needs Scipy, as ``scipy.interpolate.UnivariateSpline`` objects are used as interpolators. Parameters ---------- loudness : The loudness value in phons to be converted, or None (default) to get the threshold of hearing. Returns ------- A callable that returns the SPL dB value for each given frequency in hertz. Note ---- See ``phon2dB.iso226.schema`` and ``phon2dB.iso226.table`` to know the original frequency used for the result. The result for any other value is an interpolation (spline). Don't trust on values nor lower nor higher than the frequency limits there (20Hz and 12.5kHz) as they're not part of ISO226 and no value was collected to estimate them (they're just a spline interpolation to reach 1000dB at -30Hz and 32kHz). Likewise, the trustful loudness input range is from 20 to 90 phon, as written on ISO226, although other values aren't found by a spline interpolation but by using the formula on section 4.1 of ISO226. Hint ---- The ``phon2dB.iso226.table`` also have other useful information, such as the threshold values in SPL dB. """ from scipy.interpolate import UnivariateSpline table = phon2dB.iso226.table schema = phon2dB.iso226.schema freqs = [row[schema.index("freq")] for row in table] if loudness is None: # Threshold levels spl = [row[schema.index("threshold")] for row in table] else: # Curve for a specific phon value def get_pressure_level(freq, alpha, loudness_base, threshold): return 10 / alpha * math.log10( 4.47e-3 * (10 ** (.025 * loudness) - 1.14) + (.4 * 10 ** ((threshold + loudness_base) / 10 - 9)) ** alpha ) - loudness_base + 94 spl = [get_pressure_level(**dict(xzip(schema, args))) for args in table] interpolator = UnivariateSpline(freqs, spl, s=0) interpolator_low = UnivariateSpline([-30] + freqs, [1e3] + spl, s=0) interpolator_high = UnivariateSpline(freqs + [32000], spl + [1e3], s=0) @elementwise("freq", 0) def freq2dB_spl(freq): if freq < 20: return interpolator_low(freq).tolist() if freq > 12500: return interpolator_high(freq).tolist() return interpolator(freq).tolist() return freq2dB_spl # ISO226 Table 1 phon2dB.iso226.schema = ("freq", "alpha", "loudness_base", "threshold") phon2dB.iso226.table = ( ( 20, 0.532, -31.6, 78.5), ( 25, 0.506, -27.2, 68.7), ( 31.5, 0.480, -23.0, 59.5), ( 40, 0.455, -19.1, 51.1), ( 50, 0.432, -15.9, 44.0), ( 63, 0.409, -13.0, 37.5), ( 80, 0.387, -10.3, 31.5), ( 100, 0.367, -8.1, 26.5), ( 125, 0.349, -6.2, 22.1), ( 160, 0.330, -4.5, 17.9), ( 200, 0.315, -3.1, 14.4), ( 250, 0.301, -2.0, 11.4), ( 315, 0.288, -1.1, 8.6), ( 400, 0.276, -0.4, 6.2), ( 500, 0.267, 0.0, 4.4), ( 630, 0.259, 0.3, 3.0), ( 800, 0.253, 0.5, 2.2), ( 1000, 0.250, 0.0, 2.4), ( 1250, 0.246, -2.7, 3.5), ( 1600, 0.244, -4.1, 1.7), ( 2000, 0.243, -1.0, -1.3), ( 2500, 0.243, 1.7, -4.2), ( 3150, 0.243, 2.5, -6.0), ( 4000, 0.242, 1.2, -5.4), ( 5000, 0.242, -2.1, -1.5), ( 6300, 0.245, -7.1, 6.0), ( 8000, 0.254, -11.2, 12.6), (10000, 0.271, -10.7, 13.9), (12500, 0.301, -3.1, 12.3), )