Source code for motif.contour_extractors.hll

"""HLL method for extracting contours.
"""
import csv
import librosa
import numpy as np
import os
import subprocess
from subprocess import CalledProcessError
import tempfile as tmp

from motif.core import ContourExtractor
from motif.core import Contours


def _check_binary():
    '''Check if the binary is available.

    Returns
    -------
    True if available, False otherwise
    '''
    hll_exists = True
    try:
        subprocess.check_output(['which', 'hll'])
    except CalledProcessError:
        hll_exists = False

    return hll_exists


BINARY_AVAILABLE = _check_binary()


[docs]class HLL(ContourExtractor):
    '''HLL method for extracting contours.

    Parameters
    ----------
    hop_size : int, default=8192
        Seed detection CQT hop size.
    bins_per_octave : int, default=12
        Number of seed detection CQT bins per octave.
    min_note : str, default='E1'
        Minimum seed detection CQT note.
    peak_thresh : float, default=0.4
        Seed detection peak picking threshold.
    filter_scale : float, defualt=2.0
        CQT filter scale.
    wait : int >= 0, default=2
        Peak-picking number of samples to wait after picking a peak.
    avg_filt_len : int >= 0, default=12
        Peak-picking average filter length.
    pre_max : int >= 0, default=3
        Peak-picking num samples before `n` over which max is computed
    post_max : int >= 1, default=3
        Peak-picking num samples after `n` over which max is computed
    pre_avg : int >= 0, default=3
        Peak-picking num samples before `n` over which mean is computed
    post_avg : int >= 1, default=3
        Peak-picking num samples after `n` over which mean is computed
    delta : float >= 0, default=0.02
        Peak-picking threshold offset for mean
    n_harmonics : int, default=5
        Number of HLL harmonics.
    f_cutoff : float, default=30
        HLL cutoff frequency in Hz.
    tracking_gain : float, default=0.0005
        HLL tracking gain.
    min_contour_len_samples : int, default=11025
        HLL minimum number of samples in a single contour.
    amplitude_threshold : float, default=0.001
        HLL minimum amplitude threshold.
    tracking_update_threshold : float, default=70.0
        HLL tracking update threshold.

    Attributes
    ----------
    hop_size : int
        Seed detection CQT hop size.
    n_cqt_bins : int
        Number of seed detection CQT bins.
    bins_per_octave : int
        Number of seed detection CQT bins per octave.
    min_note : str
        Minimum seed detection CQT note.
    med_filt_len : int
        Seed detection frequency band median filter length.
    peak_thresh : float
        Seed detection peak picking threshold.
    pre_max : int >= 0
        Peak-picking number of samples before `n` over which max is computed
    post_max : int >= 1
        Peak-picking number of samples after `n` over which max is computed
    pre_avg : int >= 0
        Peak-picking number of samples before `n` over which mean is computed
    post_avg : int >= 1
        Peak-picking number of samples after `n` over which mean is computed
    delta : float >= 0
        Peak-picking threshold offset for mean
    wait : int >= 0
        Peak-picking number of samples to wait after picking a peak
    n_harmonics : int
        Number of HLL harmonics.
    f_cutoff : float
        HLL cutoff frequency in Hz.
    tracking_gain : float
        HLL tracking gain.
    min_contour_len_samples : int
        HLL minimum number of samples in a single contour.
    amplitude_threshold : float
        HLL minimum amplitude threshold.
    tracking_update_threshold : float
        HLL tracking update threshold.

    '''
    def __init__(self, hop_size=8192, n_octaves=6, bins_per_octave=12,
                 min_note='E1', peak_thresh=0.4, filter_scale=2.0, wait=2,
                 avg_filt_len=12, pre_max=3, post_max=3, pre_avg=3, post_avg=3,
                 delta=0.02, n_harmonics=5, f_cutoff=30, tracking_gain=0.0005,
                 min_contour_len_samples=11025, amplitude_threshold=0.001,
                 tracking_update_threshold=70.0):
        ''' Init method.
        '''
        # seed detection parameters
        self.hop_size = hop_size
        self.n_octaves = n_octaves
        self.bins_per_octave = bins_per_octave
        self.min_note = min_note
        self.peak_thresh = peak_thresh
        self.filter_scale = filter_scale
        self.avg_filt_len = avg_filt_len

        # librosa peak pick params for seed detection
        self.pre_max = pre_max
        self.post_max = post_max
        self.pre_avg = pre_avg
        self.post_avg = post_avg
        self.delta = delta
        self.wait = wait

        # HLL paramters
        self.n_harmonics = n_harmonics
        self.f_cutoff = f_cutoff  # Hz
        self.tracking_gain = tracking_gain
        self.min_contour_len_samples = min_contour_len_samples
        self.amplitude_threshold = amplitude_threshold
        self.tracking_update_threshold = tracking_update_threshold

        ContourExtractor.__init__(self)

    @property
    def audio_samplerate(self):
        """Sample rate of preprocessed audio.

        Returns
        -------
        audio_samplerate : float
            Number of samples per second.

        """
        return 44100.0

    @property
    def sample_rate(self):
        """Sample rate of output contours

        Returns
        -------
        sample_rate : float
            Number of samples per second.

        """
        return self.audio_samplerate / 256.0

    @property
    def min_contour_len(self):
        """Minimum allowed contour length.

        Returns
        -------
        min_contour_len : float
            Minimum allowed contour length in seconds.

        """
        return self.min_contour_len_samples / self.audio_samplerate

    @classmethod
[docs]    def get_id(cls):
        """Identifier of this extractor.

        Returns
        -------
        id : str
            Identifier of this extractor.

        """
        return "hll"

[docs]    def compute_contours(self, audio_filepath):
        """Compute contours using Harmonic Locked Loops.
        This calls a binary in the background, which creates a csv file.
        The csv file is loaded into memory and the file is deleted.

        Parameters
        ----------
        audio_filepath : str
            Path to audio file.

        Returns
        -------
        Instance of Contours object

        """
        if not BINARY_AVAILABLE:
            raise EnvironmentError(
                "The binary {} needed to compute these contours is "
                "not available. To fix this, copy the hll binary from "
                "``motif/extract/resources/`` into ``/usr/local/bin``."
            )

        if not os.path.exists(audio_filepath):
            raise IOError(
                "The audio file {} does not exist".format(audio_filepath)
            )

        tmp_audio = self._preprocess_audio(
            audio_filepath, normalize_format=True, normalize_volume=True
        )

        seed_fpath = self.get_seeds(tmp_audio)
        contours_fpath = tmp.mktemp('.csv')

        args = [
            "hll",
            "{}".format(tmp_audio),
            "{}".format(seed_fpath),
            "{}".format(contours_fpath),
            "{}".format(self.n_harmonics),
            "{}".format(self.f_cutoff),
            "{}".format(self.tracking_gain),
            "{}".format(self.min_contour_len_samples),
            "{}".format(self.amplitude_threshold),
            "{}".format(self.tracking_update_threshold)
        ]
        os.system(' '.join(args))

        if not os.path.exists(contours_fpath):
            raise IOError(
                "Unable to find HLL output file {}".format(contours_fpath)
            )

        c_numbers, c_times, c_freqs, c_sal = self._load_contours(
            contours_fpath
        )

        c_numbers, c_times, c_freqs, c_sal = self._sort_contours(
            c_numbers, c_times, c_freqs, c_sal
        )

        os.remove(contours_fpath)
        os.remove(tmp_audio)
        os.remove(seed_fpath)

        (c_numbers, c_times, c_freqs, c_sal) = self._postprocess_contours(
            c_numbers, c_times, c_freqs, c_sal
        )

        return Contours(
            c_numbers, c_times, c_freqs, c_sal, self.sample_rate,
            audio_filepath
        )

[docs]    def get_seeds(self, audio_filepath):
        """Get the seeds file to pass to the HLL tracker.

        Parameters
        ----------
        audio_filepath : str
            Path to audio file.

        Returns
        -------
        seeds_fpath : str
            Path to the seeds output file.

        """
        y, sr = librosa.load(audio_filepath, sr=44100)
        y_harmonic = librosa.effects.harmonic(y)
        cqt, samples, freqs = self._compute_cqt(y_harmonic, sr)
        seeds = self._pick_seeds_cqt(cqt, freqs, samples)

        seeds_fpath = tmp.mktemp('.csv')
        with open(seeds_fpath, 'w') as fhandle:
            writer = csv.writer(fhandle, delimiter=',')
            writer.writerows(seeds)
        return seeds_fpath

    def _moving_average(self, a):
        """Compute the moving average of a signal.

        Parameters
        ----------
        a : np.array
            Signal

        Returns
        -------
        a_avg : np.array
            Moving average of signal.

        """
        n = self.avg_filt_len
        ret = np.cumsum(a, dtype=float)
        ret[n:] = ret[n:] - ret[:-n]
        return ret[n - 1:] / n

    def _norm_matrix(self, mat, overall=True, time=True, freq=True):
        """Normalize a matrix overall, in time, or in frequency.

        Parameters
        ----------
        mat : np.array
            Matrix.
        overall : bool, default=True
            If True, normalizes first by the overall amplitudes.
        time : bool, default=True
            If True, normalizes in time.
        freq : bool, default=True
            If True, normalizes in frequency.

        Returns
        -------
        mat_norm : np.array
            Normalized matrix

        """
        if overall:
            mat = mat - np.min(mat)
            m = np.max(mat)
            if m == 0:
                m = 1
            mat = mat / m

        if time:
            mat = (mat.T - np.min(mat, axis=1)).T
            m = np.max(mat, axis=1)
            m[m == 0] = 1
            mat = (mat.T / m).T

        if freq:
            mat = mat - np.min(mat, axis=0)
            m = np.max(mat, axis=0)
            m[m == 0] = 1
            mat = mat / m

        return mat

    def _compute_cqt(self, y, sr):
        """Compute a CQT.

        Parameters
        ----------
        y : np.array
            Audio signal
        sr : float
            Audio singal sample rate

        Returns
        -------
        cqt_log : np.array [n_samples, n_freqs]
            Log amplitude CQT.
        samples : np.array [n_samples]
            CQT time stamps.
        freqs : np.array [n_freqs]
            CQT frequencies.

        """
        fmin = librosa.note_to_hz(self.min_note)
        bins_per_octave = 12
        n_cqt_bins = bins_per_octave * self.n_octaves
        cqt = np.abs(librosa.cqt(
            y, sr=sr, hop_length=self.hop_size, fmin=fmin,
            filter_scale=self.filter_scale,
            bins_per_octave=bins_per_octave, n_bins=n_cqt_bins
        ))

        cqt = self._norm_matrix(cqt)

        n_time_frames = cqt.shape[1]

        freqs = librosa.cqt_frequencies(
            fmin=fmin, bins_per_octave=bins_per_octave,
            n_bins=n_cqt_bins
        )
        samples = librosa.frames_to_samples(
            range(n_time_frames), hop_length=self.hop_size
        )

        return cqt, samples, freqs

    def _pick_seeds_cqt(self, cqt, cqt_freqs, samples):
        """Compute a CQT.

        Parameters
        ----------
        cqt : np.array [n_samples, n_freqs]
            Log amplitude CQT.
        freqs : np.array [n_freqs]
            CQT frequencies.
        samples : np.array [n_samples]
            CQT time stamps.

        Returns
        -------
        seeds : np.array [n_seeds, 2]
            Array of time, frequency seeds

        """
        seeds = []
        for i, freq in enumerate(cqt_freqs):
            freq_band = cqt[i, :]

            freq_band_smooth = self._moving_average(freq_band)
            peak_locs = librosa.util.peak_pick(
                freq_band_smooth, self.pre_max, self.post_max, self.pre_avg,
                self.post_avg, self.delta, self.wait
            )
            if len(peak_locs) > 0:
                peak_locs = peak_locs[
                    (freq_band[peak_locs] > self.peak_thresh)
                ]
                for peak_loc in peak_locs:
                    sample = samples[peak_loc]
                    seeds.append([sample, freq])

        seeds = np.array(seeds)
        return seeds

    def _load_contours(self, fpath):
        """ Load contour data from an HLL csv file.

        Parameters
        ----------
        fpath : str
            Path to output csv file.

        Returns
        -------
        index : np.array
            Array of contour numbers
        times : np.array
            Array of contour times
        freqs : np.array
            Array of contour frequencies
        contour_sal : np.array
            Array of contour saliences

        """
        index = []
        times = []
        freqs = []
        contour_sal = []
        with open(fpath, 'r') as fhandle:
            reader = csv.reader(fhandle, delimiter=',')
            for row in reader:
                index.append(row[0])
                times.append(row[1])
                freqs.append(row[2])
                contour_sal.append(row[3])  # TODO: was 3: - generalize later!

        # Add column with annotation values in cents
        index = np.array(index, dtype=int)
        times = np.array(times, dtype=float) / self.audio_samplerate
        freqs = np.array(freqs, dtype=float)
        contour_sal = np.array(contour_sal, dtype=float)

        return (index, times, freqs, contour_sal)