Source code for motif.feature_extractors.utils

"""Utility functions for computing contour features.
Each of these functions computes single sets of contour features using
information such as the times, frequencies, salience, sample rate, etc.

Each function returns a flattened numpy array for easy concatenation.

@author: mariapanteli, rabitt
"""
from __future__ import print_function
import librosa
import numpy as np
import numpy.polynomial.polynomial as Poly
import scipy


[docs]def hz_to_cents(freq_hz, ref_hz=32.0): '''Convert frequency values from Hz to cents Parameters ---------- freq_hz : np.array Array of contour frequencies in Hz ref_hz : float Reference frequency in Hz Returns ------- freq_cents : np.array Array of contour frequencies in cents ''' freq_cents = 1200.0 * np.log2(freq_hz / ref_hz) return freq_cents
[docs]def get_contour_onset(times): '''Get the first time stamp of a contour Parameters ---------- times : np.array Array of contour times Returns ------- onset : float The contour onset in seconds ''' return np.array([times[0]])
[docs]def get_contour_offset(times): '''Get the last time stamp of a contour Parameters ---------- times : np.array Array of contour times Returns ------- offset : float The contour offset in seconds ''' return np.array([times[-1]])
[docs]def get_contour_duration(times): '''Get contour duration in seconds Parameters ---------- times : np.array Array of contour times Returns ------- duration : float Duration in seconds ''' return np.array([times[-1] - times[0]])
[docs]def get_mean(signal): '''Get the mean of a signal. Parameters ---------- signal : np.array Array of values Returns ------- mean : float The mean of the signal ''' return np.array([np.mean(signal)])
[docs]def get_std(signal): '''Get the standard deviation of a signal. Parameters ---------- signal : np.array Array of values Returns ------- std : float The standard deviation of the signal ''' return np.array([np.std(signal)])
[docs]def get_sum(signal): '''Get the sum of a signal. Parameters ---------- signal : np.array Array of values Returns ------- sum : sum of the signal ''' return np.array([np.sum(signal)])
[docs]def get_range(signal): '''Get the range of a signal. Parameters ---------- signal : np.array Array of values Returns ------- range : float The range of the signal ''' return np.array([np.max(signal) - np.min(signal)])
[docs]def get_total_variation(signal): '''Get the total variation of a signal. Parameters ---------- signal : np.array Array of values Returns ------- total_variation : float The total variation of the signal ''' return np.array([np.sum(np.abs(signal[1:] - signal[:-1]))])
[docs]def get_polynomial_fit_features(times, signal, n_deg=5, norm=False): '''Fit a signal to a polynomial, return coefficients of polynomial and residual error. Parameters ---------- times : np.array Array of contour times. signal : np.array Array of values to fit. n_deg : int, default=5 Number of polynomial degrees to fit. norm : bool, default=False If True, scales the signal to be between 0 and 1 If False, the signal is not altered. Returns ------- poly_coeff : np.array The coefficients of the polynomial. poly_approx : np.array The polynomial approximation of the signal. residual : np.array The pointwise difference between the signal and the polynomial. ''' poly_coeff, _, residual = _fit_poly( n_deg, signal, grid=times, norm=norm ) return np.concatenate([poly_coeff.flatten(), [np.linalg.norm(residual)]])
def _fit_poly(n_poly_degrees, signal, grid=None, norm=False): '''Fit a signal to a polynomial. If grid is not given, assumes a uniform grid between 0 and 1 of length len(signal). Parameters ---------- n_poly_degrees : int Number of polynomial degrees to fit. signal : np.array Array of values to fit. grid : np.array or None, default=None Array of x-values, or None. If None, uses a uniform time grid between 0 and 1. norm : bool, default=False If True, scales the signal to be between 0 and 1 If False, the signal is not altered. Returns ------- poly_coeff : np.array The coefficients of the polynomial. poly_approx : np.array The polynomial approximation of the signal. residual : np.array The pointwise difference between the signal and the polynomial. ''' n_points = len(signal) if n_points < n_poly_degrees + 1: raise ValueError('signal must be at least as long as n_poly_degrees') if norm: signal = signal - np.min(signal) max_val = np.max(signal) if max_val > 0: signal = signal / max_val if grid is None: grid = np.linspace(-1, 1, num=n_points) else: grid = grid - np.mean(grid) poly_coeff = Poly.polyfit(grid, signal, n_poly_degrees) poly_approx = Poly.polyval(grid, poly_coeff) residual = signal - poly_approx return poly_coeff, poly_approx, residual def _fit_normalized_cosine(x, y, min_freq, max_freq, step): '''Assuming the amplitude is 1, find the optimal frequency and phase of a to fit a cosine. Searches within the frequency range (min_freq, max_freq). Fits to a cosine of the form: y = cos((2pi * freq * x) - phase) Parameters ---------- x : np.array Array of evenly spaced x-values. y : np.array Array of y-values. min_freq : float The minimum allowed vibrato frequency. max_freq : float The maximum allowed vibrato frequency. step : float The step size between vibrato search frequencies. Returns ------- freq : float The estimated optimal frequency. phase : float The estimated optimal phase (in radians) ''' freqs = np.arange(min_freq, max_freq, step) dot_prod = np.dot( np.exp(2.0 * np.pi * 1j * np.multiply.outer(freqs, x)), y ) dot_prod_mag = np.abs(dot_prod) peak_idx = list(scipy.signal.argrelmax(dot_prod_mag)[0]) if len(peak_idx) == 0: return 0, 0 idx = peak_idx[np.argmax(dot_prod_mag[peak_idx])] freq = freqs[idx] phase = np.angle(dot_prod[idx]) return freq, phase def _compute_coverage_array(y_sinfit_diff, cycle_length, vibrato_threshold): '''Given an array of residual differences, compute the vibrato coverage over time by splitting the interval up chunks of size cycle_length. Parameters ---------- y_sinfit_diff : np.array Array of residual differences between 0 and 1. cycle_length : float Optimal number of intervals for the estimated vibrato frequency vibrato_threshold : float Value between 0 and 1 to determine whether the fit is good enough. Returns ------- coverage : np.array Array of booleans indicating if the current frame contains vibrato. ''' n_points = len(y_sinfit_diff) half_period_idx = list( np.round( cycle_length * np.arange( 0, int(np.ceil(float(n_points) / float(cycle_length)) + 1) ) ).astype(int) ) if half_period_idx[-1] > n_points: half_period_idx = half_period_idx[:-1] if half_period_idx[-1] < n_points: half_period_idx.append(n_points) # compute the goodness of fit for each half period diff_thresh = np.zeros(y_sinfit_diff.shape) diffs = np.zeros((len(half_period_idx) - 1, )) for k, (i, j) in enumerate(zip(half_period_idx[:-1], half_period_idx[1:])): diffs[k] = np.mean(y_sinfit_diff[i:j]) diff_thresh[i:j] = diffs[k] # vibrato is active when the fit diff is below a threshold coverage = np.less_equal(diff_thresh, vibrato_threshold) diff_coverage = np.less_equal(diffs, vibrato_threshold) # If vibrato is active for less than 2 full periods, set coverage to None if sum(diff_coverage) <= 3: coverage[:] = False return coverage
[docs]def get_contour_shape_features(times, freqs, sample_rate, poly_degree=5, min_freq=3, max_freq=30, freq_step=0.1, vibrato_threshold=0.25): '''Fit contour to a low order polynomial plus sinusoidal vibrato. Parameters ---------- times : np.array Sequence of contour times freqs : np.array Sequence of contour frequencies sample_rate : float Contour sample rate poly_degree : float, default=5 Low order polynomial degree min_freq : float, default=3 The minimum allowed vibrato frequency max_freq : float, default=30 The maximum allowed vibrato frequency freq_step : float, default=0.1 The step size between vibrato search frequencies vibrato_threshold : float, default=0.25 The fitness threshold for a half period to be considered vibrato. Regions with normalized fitness differences below vibrato_threshold are considered to have vibrato. Returns ------- features : np.array Array of feautres. Elements (in order) are: - vibrato rate (in Hz) - vibrato extent (in the same units as freqs) - vibrato coverage (between 0 and 1) - vibrato coverage beginning (between 0 and 1) - vibrato coverage middle (between 0 and 1) - vibrato coverage end (between 0 and 1) - 0th polynomial coefficient - 1st polynomial coefficient - ... - Kth polynomial coefficient (K = poly_degree) - polynomial fit residual - overall model fit residual ''' n_points = len(freqs) times_shifted = times - np.mean(times) # fit contour to a low order polynomial poly_coeffs, y_poly, y_diff = _fit_poly( poly_degree, freqs, grid=times_shifted ) # remove amplitude envelope using hilbert transform y_hilbert = np.abs(scipy.signal.hilbert(y_diff)) y_hilbert[y_hilbert == 0] = 1.0 y_sin = y_diff / y_hilbert # get ideal vibrato parameters from resulting signal vib_freq, vib_phase = _fit_normalized_cosine( times_shifted, y_sin, min_freq=min_freq, max_freq=max_freq, step=freq_step ) y_sinfit = np.cos(2. * np.pi * vib_freq * times_shifted - vib_phase) # get residual of sinusoidal fit y_sinfit_diff = np.abs(y_sin - y_sinfit) # compute vibrato coverage if vib_freq > 0: cycle_length = 0.5 * ((sample_rate) / vib_freq) coverage = _compute_coverage_array( y_sinfit_diff, cycle_length, vibrato_threshold ) else: coverage = np.zeros((n_points, )).astype(bool) # compute percentage of coverage vib_coverage = coverage.mean() # if vibrato is present, set extent and rate. Otherwise they are zero. if vib_coverage > 0: vib_extent = np.mean(y_hilbert[coverage]) vib_rate = vib_freq else: vib_extent = 0.0 vib_rate = 0.0 # compute the overall model fit y_vib = vib_extent * y_sinfit y_vib[~coverage] = 0 y_modelfit = y_vib + y_poly # compute residuals polyfit_residual = np.linalg.norm(y_diff) / float(n_points) modelfit_residual = np.linalg.norm(freqs - y_modelfit) / float(n_points) # aggregate features thirds = int(np.round(n_points / 3.0)) return np.concatenate([ np.array([vib_rate, vib_extent, vib_coverage]), np.array([coverage[:thirds].mean(), coverage[thirds:2 * thirds].mean(), coverage[2 * thirds:].mean()]), poly_coeffs, np.array([polyfit_residual, modelfit_residual]) ])
[docs]def vibrato_essentia(freqs, sample_rate, hop_size=1): """Estimate vibrato parameters as in essentia. Warning: These features work but aren't very precise (e.g a perfect 12 Hz sine wav estimates a rate of 9.8). Parameters ---------- freqs : np.array Sequence of contour frequencies sample_rate : float Contour sample rate hop_size : int, default=1 Number of samples to advance each frame Returns ------- features : np.array Array of feautres. Elements (in order) are: - vibrato active (1 if active, 0 if not) - vibrato rate (in Hz, 0 if inactive) - vibrato extent (in the same units as freqs, 0 if inactive) - vibrato coverage (between 0 and 1, 0 if inactive) """ contour = freqs - np.mean(freqs) frame_size = int(np.round(0.35 * sample_rate)) fft_size = 4 * frame_size n_frames = len(contour) - frame_size freqs = np.fft.fftfreq(fft_size, 1. / sample_rate)[0:int(fft_size / 2.)] vib_inds = np.where((freqs >= 2) & (freqs <= 20))[0] # vibrato 2-20 Hz rate = [] extent = [] coverage = [] for frame in np.arange(0, n_frames, hop_size): contour_segment = ( contour[frame:frame + frame_size] - np.mean(contour[frame:frame + frame_size]) ) spec = np.abs(np.fft.fft(contour_segment, n=fft_size)) peak_inds = librosa.util.peak_pick(spec, 3, 3, 3, 5, 0.5, 10) # top 3 peaks if len(peak_inds) > 0: top_peak_idx = np.argsort(spec[peak_inds])[::-1][:3] peak_inds = peak_inds[top_peak_idx] vib_peaks = list( np.intersect1d(vib_inds, peak_inds, assume_unique=True) ) if len(vib_peaks) > 0: rate.append(np.mean(freqs[vib_peaks])) extent.append( np.max(contour[frame:frame + frame_size]) - np.min(contour[frame:frame + frame_size]) ) # append '1' if current frame has vibrato coverage.append(1.0) rate = np.mean(rate) if len(rate) > 0 else 0.0 extent = np.mean(extent) if len(extent) > 0 else 0.0 coverage = sum(coverage) / n_frames if len(coverage) > 0 else 0.0 vib_params = [rate, extent, coverage] if vib_params == [0.0, 0.0, 0.0]: feats = np.array([0] + vib_params) else: feats = np.array([1] + vib_params) return feats