Source code for motif.core
# -*- coding: utf-8 -*-
""" Core methods and base class definitions
"""
import csv
from mir_eval import melody, multipitch
import numpy as np
import os
import six
from sklearn import metrics
import sox
import tempfile as tmp
from .utils import validate_contours, format_contour_data, format_annotation
from .utils import get_snippet_idx, load_annotation
###############################################################################
[docs]class Contours(object):
'''Class containing information about all contours in a single audio
file.
Attributes
----------
nums : list
Ordered list of contour numbers
index_mapping : dict
Mapping from contour number to the indices into times/freqs/salience
where the contour is active
index : array
array of contour numbers
times : array
array of contour times
freqs : array
array of contour frequencies
salience : array
array of contour salience values
_features : dict
Mapping from contour number to computed features.
Will not be set until the compute_features method is run
_labels : dict
Mapping from contour number to computed ground truth labels.
_overlaps : dict
Mapping from contour number to computed overlap with ground truth
_scores : dict
Mapping from contour number to computed classifier score
'''
def __init__(self, index, times, freqs, salience, sample_rate,
audio_filepath=None, audio_duration=None):
'''
Parameters
----------
index : np.array
Array of contour numbers
times : np.array
Array of contour times
freqs : np.array
Array of contour frequencies
salience : np.array
Array of contour saliences
sample_rate : float
Contour sample rate.
audio_filepath : str or None
Path to audio file contours were extracted from
'''
validate_contours(index, times, freqs, salience)
if audio_filepath is not None and not os.path.exists(audio_filepath):
raise IOError("audio_filepath does not exist.")
elif audio_filepath is None and audio_duration is None:
raise ValueError(
"one of audio_filepath or audio_duration must be set.")
# contour attributes
self.index = index
self.times = times
self.freqs = freqs
self.salience = self._set_salience(salience)
self.sample_rate = sample_rate
self.audio_filepath = audio_filepath
self.audio_duration = audio_duration
self.nums = self._compute_nums()
self.index_mapping = self._compute_index_mapping()
self.duration = self._compute_duration()
self.uniform_times = self._compute_uniform_times()
def _set_salience(self, salience):
'''Set the salience attribute
Returns
-------
salience : np.array
Normalized salience.
'''
if len(salience) == 0 or np.max(salience) == 0:
return salience
else:
return salience / np.max(salience)
def _compute_nums(self):
'''Compute the list of contour index numbers
Returns
-------
nums : list
Sorted list of contour index numbers
'''
return sorted(list(set(self.index)))
def _compute_index_mapping(self):
'''Computes the mapping from contour numbers to indices.
Returns
-------
index_mapping : dict
Mapping from contour numbers to indices.
'''
index_mapping = dict.fromkeys(self.nums)
for num in self.nums:
idxs = np.where(self.index == num)[0]
index_mapping[num] = range(idxs[0], idxs[-1] + 1)
return index_mapping
def _compute_duration(self):
'''Compute the duration of the audio file.
Returns
-------
duration : float
Audio file duration
'''
if self.audio_duration is not None:
return self.audio_duration
else:
return sox.file_info.duration(self.audio_filepath)
def _compute_uniform_times(self):
'''Compute array of uniform time stamps at the sample rate
Returns
-------
uniform_times : np.array
Array of uniform time stamps at the sample rate
'''
n_stamps = int(np.ceil(self.duration * self.sample_rate)) + 1
uniform_times = np.arange(0, n_stamps) / float(self.sample_rate)
return uniform_times
[docs] def contour_times(self, index):
'''Get the time stamps for a particular contour number.
Parameters
----------
index : int
contour number
Returns
-------
contour_times : array
array of contour times
'''
return self.times[self.index_mapping[index]]
[docs] def contour_freqs(self, index):
'''Get the frequency values for a particular contour number.
Parameters
----------
index : int
contour number
Returns
-------
contour_frequencies : array
array of contour frequency values
'''
return self.freqs[self.index_mapping[index]]
[docs] def contour_salience(self, index):
'''Get the salience values for a particular contour number.
Parameters
----------
index : int
contour number
Returns
-------
contour_salience : array
array of contour salience values
'''
return self.salience[self.index_mapping[index]]
[docs] def compute_labels(self, annotation_fpath, overlap_threshold=0.5, single_f0=True):
'''Compute overlaps with an annotation and labels for each contour.
Parameters
----------
annotation_fpath : str
Path to annotation file.
overlap_threshold : float, default=0.5
The minimum amount of overlap with the annotation for a contour to
be labeled as a positive example; between 0 and 1.
'''
if single_f0:
annot_times, annot_freqs = load_annotation(annotation_fpath)
else:
raise NotImplementedError
ref_cent, ref_voicing = format_annotation(
self.uniform_times, annot_times, annot_freqs
)
est_cents, est_voicing = format_contour_data(self.freqs)
labels = dict.fromkeys(self.nums)
overlaps = dict.fromkeys(self.nums)
for i in self.nums:
gt_idx = get_snippet_idx(self.contour_times(i), self.uniform_times)
this_est_cent, this_est_voicing = melody.resample_melody_series(
self.contour_times(i), est_cents[self.index_mapping[i]],
est_voicing[self.index_mapping[i]], self.uniform_times[gt_idx]
)
overlaps[i] = melody.overall_accuracy(
ref_voicing[gt_idx], ref_cent[gt_idx],
this_est_voicing,
this_est_cent
)
labels[i] = 1 * (overlaps[i] > overlap_threshold)
labels = np.array([labels[n] for n in self.nums])
overlaps = np.array([overlaps[n] for n in self.nums])
return labels, overlaps
[docs] def to_multif0_format(self):
'''Convert contours to multi-f0 format.
Returns
-------
times : np.array
uniform time stamps
freqs : list of lists
Each row has the form [time, freq1, freq2, ...]
Each row may have any number of frequencies.
'''
n_uniform_times = len(self.uniform_times)
freqs = [[] for i in range(n_uniform_times)]
time_idx = np.round(self.times * self.sample_rate).astype(int)
time_idx[time_idx >= n_uniform_times] = n_uniform_times - 1
for i, freq in zip(time_idx, self.freqs):
freqs[i].append(freq)
freqs = [np.array(f).astype(float) for f in freqs]
return self.uniform_times, freqs
[docs] def coverage(self, annotation_fpath, single_f0=True):
""" Compute how much the set of contours covers the annotation
Parameters
----------
annotation_fpath : str
Path to annotation file.
single_f0 : bool
True for a file containing a single pitch per time stamp
False for a file containing possibly multiple pitches / time stamp
Returns
-------
scores : dict
Dictionary of mutlipitch scores.
"""
est_times, est_freqs = self.to_multif0_format()
if single_f0:
ref_times, ref_freqs = load_annotation(
annotation_fpath, n_freqs=1, to_array=False, rm_zeros=True
)
else:
ref_times, ref_freqs = load_annotation(
annotation_fpath, n_freqs=None, to_array=False, rm_zeros=True
)
scores = multipitch.evaluate(
ref_times, ref_freqs, est_times, est_freqs
)
return scores
[docs] def save_contours_subset(self, output_fpath, output_nums):
'''Save extracted contours where `score >= threshold` to a csv file.
Parameters
----------
output_fpath : str
Path to save output csv file.
output_nums : list
List of contour numbers to save
'''
target_indices = []
for num in output_nums:
target_indices.extend(self.index_mapping[num])
with open(output_fpath, 'w') as fhandle:
writer = csv.writer(fhandle, delimiter=',')
writer.writerows(zip(
self.index[target_indices],
self.times[target_indices],
self.freqs[target_indices],
self.salience[target_indices]
))
[docs] def save(self, output_fpath):
'''Save extracted contours to a csv file.
Parameters
----------
output_fpath : str
Path to save output csv file.
'''
with open(output_fpath, 'w') as fhandle:
writer = csv.writer(fhandle, delimiter=',')
writer.writerows(zip(
self.index,
self.times,
self.freqs,
self.salience
))
###############################################################################
CONTOUR_EXTRACTOR_REGISTRY = {} # All available extractors
[docs]class MetaContourExtractor(type):
"""Meta-class to register the available extractors."""
def __new__(meta, name, bases, class_dict):
cls = type.__new__(meta, name, bases, class_dict)
# Register classes that inherit from the base class ContourExtractors
if "ContourExtractor" in [base.__name__ for base in bases]:
CONTOUR_EXTRACTOR_REGISTRY[cls.get_id()] = cls
return cls
[docs]class ContourExtractor(six.with_metaclass(MetaContourExtractor)):
"""This class is an interface for all the contour extraction algorithms
included in motif. Each extractor must inherit from it and implement the
following method:
- ``compute_contours``
Additionally, two private helper functions are provided:
- ``preprocess``
- ``postprocess``
These are meant to do common tasks for all the extractors and they should
be called inside the process method if needed.
Some methods may call a binary in the background, which creates a csv file.
The csv file is loaded into memory and the file is deleted, unless
``clean=False``. When ``recompute=False``, this will first look for an
existing precomputed contour file and if successful will load it directly.
"""
def __init__(self):
self.audio_channels = 1
self.audio_bitdepth = 32
self.audio_db_level = -3.0
@property
def audio_samplerate(self):
"""Property to get the sample rate of the output contours"""
raise NotImplementedError("This property must return the sample rate "
"of the output contours.")
@property
def sample_rate(self):
"""Property to get the sample rate of the output contours"""
raise NotImplementedError("This property must return the sample rate "
"of the output contours.")
@property
def min_contour_len(self):
"""Property to get the minimum length of a contour in seconds"""
raise NotImplementedError("This property must return the minimum "
"contour length in seconds.")
@classmethod
[docs] def get_id(cls):
"""Method to get the id of the extractor type"""
raise NotImplementedError("This method must return a string identifier"
" of the contour extraction type")
[docs] def compute_contours(self, input_filepath):
"""Method for computing features for given file"""
raise NotImplementedError("This method must contain the actual "
"implementation of the contour extraction")
def _preprocess_audio(self, audio_filepath, normalize_format=True,
normalize_volume=True, hpss=False,
equal_loudness_filter=False):
'''Preprocess audio before computing contours
Parameters
----------
normalize : bool
If True, normalize the audio
hpss : bool
If True, applies HPSS & computes contours on the harmonic compoment
equal_loudness_filter : bool
If True, applies an equal loudness filter to the audio
'''
tfm = sox.Transformer()
if normalize_format:
tfm.convert(
samplerate=self.audio_samplerate,
n_channels=self.audio_channels,
bitdepth=self.audio_bitdepth
)
if normalize_volume:
tfm.norm(db_level=self.audio_db_level)
output_path = tmp.mktemp('.wav')
tfm.build(audio_filepath, output_path)
if hpss:
raise NotImplementedError
if equal_loudness_filter:
raise NotImplementedError
return output_path
def _postprocess_contours(self, index, times, freqs, salience):
"""Remove contours that are too short.
Parameters
----------
index : np.array
array of contour numbers
times : np.array
array of contour times
freqs : np.array
array of contour frequencies
salience : np.array
array of contour salience values
Returns
-------
index_pruned : np.array
Pruned array of contour numbers
times_pruned : np.array
Pruned array of contour times
freqs_pruned : np.array
Pruned array of contour frequencies
salience_pruned : np.array
Pruned array of contour salience values
"""
keep_index = np.ones(times.shape).astype(bool)
for i in set(index):
this_idx = (index == i)
if np.ptp(times[this_idx]) <= self.min_contour_len:
keep_index[this_idx] = False
return (index[keep_index], times[keep_index],
freqs[keep_index], salience[keep_index])
def _sort_contours(self, index, times, freqs, salience):
"""Sort contours by index and time.
Parameters
----------
index : np.array
array of contour numbers
times : np.array
array of contour times
freqs : np.array
array of contour frequencies
salience : np.array
array of contour salience values
Returns
-------
index_sorted : np.array
Pruned array of contour numbers
times_sorted : np.array
Pruned array of contour times
freqs_sorted : np.array
Pruned array of contour frequencies
salience_sorted : np.array
Pruned array of contour salience values
"""
sort_idx = np.lexsort((times, index))
return (
index[sort_idx], times[sort_idx], freqs[sort_idx],
salience[sort_idx]
)
###############################################################################
FEATURE_EXTRACTOR_REGISTRY = {} # All available classifiers
[docs]class MetaFeatureExtractor(type):
"""Meta-class to register the available contour features."""
def __new__(meta, name, bases, class_dict):
cls = type.__new__(meta, name, bases, class_dict)
# Register classes that inherit from the base class FeatureExtractor
if "FeatureExtractor" in [base.__name__ for base in bases]:
FEATURE_EXTRACTOR_REGISTRY[cls.get_id()] = cls
return cls
[docs]class FeatureExtractor(six.with_metaclass(MetaFeatureExtractor)):
"""This class is an interface for all the feature extraction combinations
included in motif. Each feature set must inherit from it and implement the
following methods:
- ``get_feature_vector``
This should return a flat numpy array
- ``feature_names``
This should return a list of the same length as the above
numpy array of what each dimension is. Can be as simple as an
index, can be identfiers such as ['vibrato rate', 'vibrato extent']
"""
def __init__(self):
pass
[docs] def get_feature_vector(self, times, freqs, salience, sample_rate):
"""Method for computing features for a given contour"""
raise NotImplementedError("This method must contain the actual "
"implementation of the contour feautres")
@property
def feature_names(self):
"""Set the array of features names."""
raise NotImplementedError("This method must create and return a list "
"of feature names, the same length as the"
"feature vector.")
@classmethod
[docs] def get_id(cls):
"""Method to get the id of the feature type"""
raise NotImplementedError("This method must return a string identifier"
"of the feature type")
[docs] def compute_all(self, ctr):
""" Compute features for all contours.
Parameters
----------
ctr : Contour
Instance of Contour object
Returns
-------
features : np.array [n_contours, n_features]
Feature matrix, ordered by contour number
"""
features = []
for i in ctr.nums:
if len(ctr.index_mapping[i]) > 0:
feature_vector = self.get_feature_vector(
ctr.contour_times(i),
ctr.contour_freqs(i),
ctr.contour_salience(i),
ctr.sample_rate
)
features.append(feature_vector)
return np.array(features)
###############################################################################
CONTOUR_CLASSIFIER_REGISTRY = {} # All available classifiers
[docs]class MetaContourClassifier(type):
"""Meta-class to register the available classifiers."""
def __new__(meta, name, bases, class_dict):
cls = type.__new__(meta, name, bases, class_dict)
# Register classes that inherit from the base class ContourClassifier
if "ContourClassifier" in [base.__name__ for base in bases]:
CONTOUR_CLASSIFIER_REGISTRY[cls.get_id()] = cls
return cls
[docs]class ContourClassifier(six.with_metaclass(MetaContourClassifier)):
"""This class is an interface for all the contour classifier algorithms
included in motif. Each classifer must inherit from it and implement the
following methods:
- ``predict``
- ``fit``
- ``threshold``
``threshold`` should return a float whose determines the positive class
threshold (e.g. ``score >= threshold`` : positive class,
``score < threshold`` : negative class)
"""
def __init__(self):
pass
@property
def threshold(self):
"""Property for setting threshold between classes"""
raise NotImplementedError("This method most return a float that "
"indicates the score cutoff between the "
"positive and negative class.")
[docs] def predict(self, X):
"""Method for predicting labels from input"""
raise NotImplementedError("This method must contain the actual "
"implementation of the prediction")
[docs] def fit(self, X, Y):
"""Method for fitting the model"""
raise NotImplementedError("This method must contain the actual "
"implementation of the model fitting")
@classmethod
[docs] def get_id(cls):
"""Method to get the id of the extractor type"""
raise NotImplementedError("This method must return a string identifier"
" of the contour extraction type")
[docs] def score(self, y_predicted, y_target, y_prob=None):
""" Compute metrics on classifier predictions
Parameters
----------
y_predicted : np.array [n_samples]
Predicted class labels
y_target : np.array [n_samples]
Target class labels
y_prob : np.array [n_samples] or None, default=None
predicted probabilties. If None, auc is not computed
Returns
-------
scores : dict
dictionary of scores for the following metrics:
accuracy, matthews correlation coefficient, precision, recall, f1,
support, confusion matrix, auc score
"""
labels = set(y_target)
labels.update(y_predicted)
is_binary = len(labels) <= 2
scores = {}
scores['accuracy'] = metrics.accuracy_score(y_target, y_predicted)
if is_binary:
scores['mcc'] = metrics.matthews_corrcoef(y_target, y_predicted)
else:
scores['mcc'] = None
(scores['precision'],
scores['recall'],
scores['f1'],
scores['support']) = metrics.precision_recall_fscore_support(
y_target, y_predicted
)
scores['confusion matrix'] = metrics.confusion_matrix(
y_target, y_predicted, labels=list(labels)
)
if y_prob is not None:
scores['auc score'] = metrics.roc_auc_score(
y_target, y_prob + 1, average='weighted'
)
else:
scores['auc score'] = None
return scores
###############################################################################
CONTOUR_DECODER_REGISTRY = {} # All available decoders
[docs]class MetaContourDecoder(type):
"""Meta-class to register the available decoders."""
def __new__(meta, name, bases, class_dict):
cls = type.__new__(meta, name, bases, class_dict)
# Register classes that inherit from the base class ContourDecoder
if "ContourDecoder" in [base.__name__ for base in bases]:
CONTOUR_DECODER_REGISTRY[cls.get_id()] = cls
return cls
[docs]class ContourDecoder(six.with_metaclass(MetaContourDecoder)):
"""This class is an interface for all the contour decoder algorithms
included in motif. Each decoder must inherit from it and implement the
following methods:
- ``decode``
- ``get_id``
"""
def __init__(self):
pass
[docs] def decode(self, ctr, Y):
""" Decode the output of the contour classifier.
Parameters
----------
ctr : Contours
An instance of a Contours object
Y : np.array [n_contours]
Predicted contour scores.
Returns
-------
times : np.ndarray
Array of time stamps
freqs : np.ndarray
Array of f0 values in Hz
"""
raise NotImplementedError("This method must contain the actual "
"implementation of the decoder.")
@classmethod
[docs] def get_id(cls):
"""Method to get the id of the decoder type"""
raise NotImplementedError("This method must return a string identifier"
" of the contour decoder type")