Source code for motif.contour_extractors.hll

"""HLL method for extracting contours.
"""
import csv
import librosa
import numpy as np
import os
import subprocess
from subprocess import CalledProcessError
import tempfile as tmp

from motif.core import ContourExtractor
from motif.core import Contours


def _check_binary():
    '''Check if the binary is available.

    Returns
    -------
    True if available, False otherwise
    '''
    hll_exists = True
    try:
        subprocess.check_output(['which', 'hll'])
    except CalledProcessError:
        hll_exists = False

    return hll_exists


BINARY_AVAILABLE = _check_binary()


[docs]class HLL(ContourExtractor): '''HLL method for extracting contours. Parameters ---------- hop_size : int, default=8192 Seed detection CQT hop size. bins_per_octave : int, default=12 Number of seed detection CQT bins per octave. min_note : str, default='E1' Minimum seed detection CQT note. peak_thresh : float, default=0.4 Seed detection peak picking threshold. filter_scale : float, defualt=2.0 CQT filter scale. wait : int >= 0, default=2 Peak-picking number of samples to wait after picking a peak. avg_filt_len : int >= 0, default=12 Peak-picking average filter length. pre_max : int >= 0, default=3 Peak-picking num samples before `n` over which max is computed post_max : int >= 1, default=3 Peak-picking num samples after `n` over which max is computed pre_avg : int >= 0, default=3 Peak-picking num samples before `n` over which mean is computed post_avg : int >= 1, default=3 Peak-picking num samples after `n` over which mean is computed delta : float >= 0, default=0.02 Peak-picking threshold offset for mean n_harmonics : int, default=5 Number of HLL harmonics. f_cutoff : float, default=30 HLL cutoff frequency in Hz. tracking_gain : float, default=0.0005 HLL tracking gain. min_contour_len_samples : int, default=11025 HLL minimum number of samples in a single contour. amplitude_threshold : float, default=0.001 HLL minimum amplitude threshold. tracking_update_threshold : float, default=70.0 HLL tracking update threshold. Attributes ---------- hop_size : int Seed detection CQT hop size. n_cqt_bins : int Number of seed detection CQT bins. bins_per_octave : int Number of seed detection CQT bins per octave. min_note : str Minimum seed detection CQT note. med_filt_len : int Seed detection frequency band median filter length. peak_thresh : float Seed detection peak picking threshold. pre_max : int >= 0 Peak-picking number of samples before `n` over which max is computed post_max : int >= 1 Peak-picking number of samples after `n` over which max is computed pre_avg : int >= 0 Peak-picking number of samples before `n` over which mean is computed post_avg : int >= 1 Peak-picking number of samples after `n` over which mean is computed delta : float >= 0 Peak-picking threshold offset for mean wait : int >= 0 Peak-picking number of samples to wait after picking a peak n_harmonics : int Number of HLL harmonics. f_cutoff : float HLL cutoff frequency in Hz. tracking_gain : float HLL tracking gain. min_contour_len_samples : int HLL minimum number of samples in a single contour. amplitude_threshold : float HLL minimum amplitude threshold. tracking_update_threshold : float HLL tracking update threshold. ''' def __init__(self, hop_size=8192, n_octaves=6, bins_per_octave=12, min_note='E1', peak_thresh=0.4, filter_scale=2.0, wait=2, avg_filt_len=12, pre_max=3, post_max=3, pre_avg=3, post_avg=3, delta=0.02, n_harmonics=5, f_cutoff=30, tracking_gain=0.0005, min_contour_len_samples=11025, amplitude_threshold=0.001, tracking_update_threshold=70.0): ''' Init method. ''' # seed detection parameters self.hop_size = hop_size self.n_octaves = n_octaves self.bins_per_octave = bins_per_octave self.min_note = min_note self.peak_thresh = peak_thresh self.filter_scale = filter_scale self.avg_filt_len = avg_filt_len # librosa peak pick params for seed detection self.pre_max = pre_max self.post_max = post_max self.pre_avg = pre_avg self.post_avg = post_avg self.delta = delta self.wait = wait # HLL paramters self.n_harmonics = n_harmonics self.f_cutoff = f_cutoff # Hz self.tracking_gain = tracking_gain self.min_contour_len_samples = min_contour_len_samples self.amplitude_threshold = amplitude_threshold self.tracking_update_threshold = tracking_update_threshold ContourExtractor.__init__(self) @property def audio_samplerate(self): """Sample rate of preprocessed audio. Returns ------- audio_samplerate : float Number of samples per second. """ return 44100.0 @property def sample_rate(self): """Sample rate of output contours Returns ------- sample_rate : float Number of samples per second. """ return self.audio_samplerate / 256.0 @property def min_contour_len(self): """Minimum allowed contour length. Returns ------- min_contour_len : float Minimum allowed contour length in seconds. """ return self.min_contour_len_samples / self.audio_samplerate @classmethod
[docs] def get_id(cls): """Identifier of this extractor. Returns ------- id : str Identifier of this extractor. """ return "hll"
[docs] def compute_contours(self, audio_filepath): """Compute contours using Harmonic Locked Loops. This calls a binary in the background, which creates a csv file. The csv file is loaded into memory and the file is deleted. Parameters ---------- audio_filepath : str Path to audio file. Returns ------- Instance of Contours object """ if not BINARY_AVAILABLE: raise EnvironmentError( "The binary {} needed to compute these contours is " "not available. To fix this, copy the hll binary from " "``motif/extract/resources/`` into ``/usr/local/bin``." ) if not os.path.exists(audio_filepath): raise IOError( "The audio file {} does not exist".format(audio_filepath) ) tmp_audio = self._preprocess_audio( audio_filepath, normalize_format=True, normalize_volume=True ) seed_fpath = self.get_seeds(tmp_audio) contours_fpath = tmp.mktemp('.csv') args = [ "hll", "{}".format(tmp_audio), "{}".format(seed_fpath), "{}".format(contours_fpath), "{}".format(self.n_harmonics), "{}".format(self.f_cutoff), "{}".format(self.tracking_gain), "{}".format(self.min_contour_len_samples), "{}".format(self.amplitude_threshold), "{}".format(self.tracking_update_threshold) ] os.system(' '.join(args)) if not os.path.exists(contours_fpath): raise IOError( "Unable to find HLL output file {}".format(contours_fpath) ) c_numbers, c_times, c_freqs, c_sal = self._load_contours( contours_fpath ) c_numbers, c_times, c_freqs, c_sal = self._sort_contours( c_numbers, c_times, c_freqs, c_sal ) os.remove(contours_fpath) os.remove(tmp_audio) os.remove(seed_fpath) (c_numbers, c_times, c_freqs, c_sal) = self._postprocess_contours( c_numbers, c_times, c_freqs, c_sal ) return Contours( c_numbers, c_times, c_freqs, c_sal, self.sample_rate, audio_filepath )
[docs] def get_seeds(self, audio_filepath): """Get the seeds file to pass to the HLL tracker. Parameters ---------- audio_filepath : str Path to audio file. Returns ------- seeds_fpath : str Path to the seeds output file. """ y, sr = librosa.load(audio_filepath, sr=44100) y_harmonic = librosa.effects.harmonic(y) cqt, samples, freqs = self._compute_cqt(y_harmonic, sr) seeds = self._pick_seeds_cqt(cqt, freqs, samples) seeds_fpath = tmp.mktemp('.csv') with open(seeds_fpath, 'w') as fhandle: writer = csv.writer(fhandle, delimiter=',') writer.writerows(seeds) return seeds_fpath
def _moving_average(self, a): """Compute the moving average of a signal. Parameters ---------- a : np.array Signal Returns ------- a_avg : np.array Moving average of signal. """ n = self.avg_filt_len ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n def _norm_matrix(self, mat, overall=True, time=True, freq=True): """Normalize a matrix overall, in time, or in frequency. Parameters ---------- mat : np.array Matrix. overall : bool, default=True If True, normalizes first by the overall amplitudes. time : bool, default=True If True, normalizes in time. freq : bool, default=True If True, normalizes in frequency. Returns ------- mat_norm : np.array Normalized matrix """ if overall: mat = mat - np.min(mat) m = np.max(mat) if m == 0: m = 1 mat = mat / m if time: mat = (mat.T - np.min(mat, axis=1)).T m = np.max(mat, axis=1) m[m == 0] = 1 mat = (mat.T / m).T if freq: mat = mat - np.min(mat, axis=0) m = np.max(mat, axis=0) m[m == 0] = 1 mat = mat / m return mat def _compute_cqt(self, y, sr): """Compute a CQT. Parameters ---------- y : np.array Audio signal sr : float Audio singal sample rate Returns ------- cqt_log : np.array [n_samples, n_freqs] Log amplitude CQT. samples : np.array [n_samples] CQT time stamps. freqs : np.array [n_freqs] CQT frequencies. """ fmin = librosa.note_to_hz(self.min_note) bins_per_octave = 12 n_cqt_bins = bins_per_octave * self.n_octaves cqt = np.abs(librosa.cqt( y, sr=sr, hop_length=self.hop_size, fmin=fmin, filter_scale=self.filter_scale, bins_per_octave=bins_per_octave, n_bins=n_cqt_bins )) cqt = self._norm_matrix(cqt) n_time_frames = cqt.shape[1] freqs = librosa.cqt_frequencies( fmin=fmin, bins_per_octave=bins_per_octave, n_bins=n_cqt_bins ) samples = librosa.frames_to_samples( range(n_time_frames), hop_length=self.hop_size ) return cqt, samples, freqs def _pick_seeds_cqt(self, cqt, cqt_freqs, samples): """Compute a CQT. Parameters ---------- cqt : np.array [n_samples, n_freqs] Log amplitude CQT. freqs : np.array [n_freqs] CQT frequencies. samples : np.array [n_samples] CQT time stamps. Returns ------- seeds : np.array [n_seeds, 2] Array of time, frequency seeds """ seeds = [] for i, freq in enumerate(cqt_freqs): freq_band = cqt[i, :] freq_band_smooth = self._moving_average(freq_band) peak_locs = librosa.util.peak_pick( freq_band_smooth, self.pre_max, self.post_max, self.pre_avg, self.post_avg, self.delta, self.wait ) if len(peak_locs) > 0: peak_locs = peak_locs[ (freq_band[peak_locs] > self.peak_thresh) ] for peak_loc in peak_locs: sample = samples[peak_loc] seeds.append([sample, freq]) seeds = np.array(seeds) return seeds def _load_contours(self, fpath): """ Load contour data from an HLL csv file. Parameters ---------- fpath : str Path to output csv file. Returns ------- index : np.array Array of contour numbers times : np.array Array of contour times freqs : np.array Array of contour frequencies contour_sal : np.array Array of contour saliences """ index = [] times = [] freqs = [] contour_sal = [] with open(fpath, 'r') as fhandle: reader = csv.reader(fhandle, delimiter=',') for row in reader: index.append(row[0]) times.append(row[1]) freqs.append(row[2]) contour_sal.append(row[3]) # TODO: was 3: - generalize later! # Add column with annotation values in cents index = np.array(index, dtype=int) times = np.array(times, dtype=float) / self.audio_samplerate freqs = np.array(freqs, dtype=float) contour_sal = np.array(contour_sal, dtype=float) return (index, times, freqs, contour_sal)