Source code for motif.core

# -*- coding: utf-8 -*-
""" Core methods and base class definitions
"""
import csv
from mir_eval import melody, multipitch
import numpy as np
import os
import six
from sklearn import metrics
import sox
import tempfile as tmp

from .utils import validate_contours, format_contour_data, format_annotation
from .utils import get_snippet_idx, load_annotation


###############################################################################
[docs]class Contours(object):
    '''Class containing information about all contours in a single audio
    file.

    Attributes
    ----------
    nums : list
        Ordered list of contour numbers
    index_mapping : dict
        Mapping from contour number to the indices into times/freqs/salience
        where the contour is active
    index : array
        array of contour numbers
    times : array
        array of contour times
    freqs : array
        array of contour frequencies
    salience : array
        array of contour salience values
    _features : dict
        Mapping from contour number to computed features.
        Will not be set until the compute_features method is run
    _labels : dict
        Mapping from contour number to computed ground truth labels.
    _overlaps : dict
        Mapping from contour number to computed overlap with ground truth
    _scores : dict
        Mapping from contour number to computed classifier score

    '''
    def __init__(self, index, times, freqs, salience, sample_rate,
                 audio_filepath=None, audio_duration=None):
        '''
        Parameters
        ----------
        index : np.array
            Array of contour numbers
        times : np.array
            Array of contour times
        freqs : np.array
            Array of contour frequencies
        salience : np.array
            Array of contour saliences
        sample_rate : float
            Contour sample rate.
        audio_filepath : str or None
            Path to audio file contours were extracted from

        '''
        validate_contours(index, times, freqs, salience)
        if audio_filepath is not None and not os.path.exists(audio_filepath):
            raise IOError("audio_filepath does not exist.")
        elif audio_filepath is None and audio_duration is None:
            raise ValueError(
                "one of audio_filepath or audio_duration must be set.")

        # contour attributes
        self.index = index
        self.times = times
        self.freqs = freqs
        self.salience = self._set_salience(salience)
        self.sample_rate = sample_rate
        self.audio_filepath = audio_filepath
        self.audio_duration = audio_duration

        self.nums = self._compute_nums()
        self.index_mapping = self._compute_index_mapping()
        self.duration = self._compute_duration()
        self.uniform_times = self._compute_uniform_times()

    def _set_salience(self, salience):
        '''Set the salience attribute

        Returns
        -------
        salience : np.array
            Normalized salience.

        '''
        if len(salience) == 0 or np.max(salience) == 0:
            return salience
        else:
            return salience / np.max(salience)

    def _compute_nums(self):
        '''Compute the list of contour index numbers

        Returns
        -------
        nums : list
            Sorted list of contour index numbers

        '''
        return sorted(list(set(self.index)))

    def _compute_index_mapping(self):
        '''Computes the mapping from contour numbers to indices.

        Returns
        -------
        index_mapping : dict
            Mapping from contour numbers to indices.

        '''
        index_mapping = dict.fromkeys(self.nums)
        for num in self.nums:
            idxs = np.where(self.index == num)[0]
            index_mapping[num] = range(idxs[0], idxs[-1] + 1)
        return index_mapping

    def _compute_duration(self):
        '''Compute the duration of the audio file.

        Returns
        -------
        duration : float
            Audio file duration
        '''
        if self.audio_duration is not None:
            return self.audio_duration
        else:
            return sox.file_info.duration(self.audio_filepath)

    def _compute_uniform_times(self):
        '''Compute array of uniform time stamps at the sample rate

        Returns
        -------
        uniform_times : np.array
            Array of uniform time stamps at the sample rate
        '''
        n_stamps = int(np.ceil(self.duration * self.sample_rate)) + 1
        uniform_times = np.arange(0, n_stamps) / float(self.sample_rate)
        return uniform_times

[docs]    def contour_times(self, index):
        '''Get the time stamps for a particular contour number.

        Parameters
        ----------
        index : int
            contour number

        Returns
        -------
        contour_times : array
            array of contour times
        '''
        return self.times[self.index_mapping[index]]

[docs]    def contour_freqs(self, index):
        '''Get the frequency values for a particular contour number.

        Parameters
        ----------
        index : int
            contour number

        Returns
        -------
        contour_frequencies : array
            array of contour frequency values
        '''
        return self.freqs[self.index_mapping[index]]

[docs]    def contour_salience(self, index):
        '''Get the salience values for a particular contour number.

        Parameters
        ----------
        index : int
            contour number

        Returns
        -------
        contour_salience : array
            array of contour salience values
        '''
        return self.salience[self.index_mapping[index]]

[docs]    def compute_labels(self, annotation_fpath, overlap_threshold=0.5, single_f0=True):
        '''Compute overlaps with an annotation and labels for each contour.

        Parameters
        ----------
        annotation_fpath : str
            Path to annotation file.
        overlap_threshold : float, default=0.5
            The minimum amount of overlap with the annotation for a contour to
            be labeled as a positive example; between 0 and 1.

        '''
        if single_f0:
            annot_times, annot_freqs = load_annotation(annotation_fpath)
        else:
            raise NotImplementedError

        ref_cent, ref_voicing = format_annotation(
            self.uniform_times, annot_times, annot_freqs
        )
        est_cents, est_voicing = format_contour_data(self.freqs)

        labels = dict.fromkeys(self.nums)
        overlaps = dict.fromkeys(self.nums)

        for i in self.nums:
            gt_idx = get_snippet_idx(self.contour_times(i), self.uniform_times)

            this_est_cent, this_est_voicing = melody.resample_melody_series(
                self.contour_times(i), est_cents[self.index_mapping[i]],
                est_voicing[self.index_mapping[i]], self.uniform_times[gt_idx]
            )

            overlaps[i] = melody.overall_accuracy(
                ref_voicing[gt_idx], ref_cent[gt_idx],
                this_est_voicing,
                this_est_cent
            )
            labels[i] = 1 * (overlaps[i] > overlap_threshold)

        labels = np.array([labels[n] for n in self.nums])
        overlaps = np.array([overlaps[n] for n in self.nums])
        return labels, overlaps

[docs]    def to_multif0_format(self):
        '''Convert contours to multi-f0 format.

        Returns
        -------
        times : np.array
            uniform time stamps
        freqs : list of lists
            Each row has the form [time, freq1, freq2, ...]
            Each row may have any number of frequencies.
        '''
        n_uniform_times = len(self.uniform_times)
        freqs = [[] for i in range(n_uniform_times)]

        time_idx = np.round(self.times * self.sample_rate).astype(int)
        time_idx[time_idx >= n_uniform_times] = n_uniform_times - 1
        for i, freq in zip(time_idx, self.freqs):
            freqs[i].append(freq)
        freqs = [np.array(f).astype(float) for f in freqs]

        return self.uniform_times, freqs

[docs]    def coverage(self, annotation_fpath, single_f0=True):
        """ Compute how much the set of contours covers the annotation

        Parameters
        ----------
        annotation_fpath : str
            Path to annotation file.
        single_f0 : bool
            True for a file containing a single pitch per time stamp
            False for a file containing possibly multiple pitches / time stamp

        Returns
        -------
        scores : dict
            Dictionary of mutlipitch scores.

        """
        est_times, est_freqs = self.to_multif0_format()
        if single_f0:
            ref_times, ref_freqs = load_annotation(
                annotation_fpath, n_freqs=1, to_array=False, rm_zeros=True
            )

        else:
            ref_times, ref_freqs = load_annotation(
                annotation_fpath, n_freqs=None, to_array=False, rm_zeros=True
            )

        scores = multipitch.evaluate(
            ref_times, ref_freqs, est_times, est_freqs
        )
        return scores

[docs]    def save_contours_subset(self, output_fpath, output_nums):
        '''Save extracted contours where `score >= threshold` to a csv file.

        Parameters
        ----------
        output_fpath : str
            Path to save output csv file.
        output_nums : list
            List of contour numbers to save

        '''
        target_indices = []
        for num in output_nums:
            target_indices.extend(self.index_mapping[num])

        with open(output_fpath, 'w') as fhandle:
            writer = csv.writer(fhandle, delimiter=',')
            writer.writerows(zip(
                self.index[target_indices],
                self.times[target_indices],
                self.freqs[target_indices],
                self.salience[target_indices]
            ))

[docs]    def save(self, output_fpath):
        '''Save extracted contours to a csv file.

        Parameters
        ----------
        output_fpath : str
            Path to save output csv file.

        '''
        with open(output_fpath, 'w') as fhandle:
            writer = csv.writer(fhandle, delimiter=',')
            writer.writerows(zip(
                self.index,
                self.times,
                self.freqs,
                self.salience
            ))


###############################################################################
CONTOUR_EXTRACTOR_REGISTRY = {}  # All available extractors


[docs]class MetaContourExtractor(type):
    """Meta-class to register the available extractors."""
    def __new__(meta, name, bases, class_dict):
        cls = type.__new__(meta, name, bases, class_dict)
        # Register classes that inherit from the base class ContourExtractors
        if "ContourExtractor" in [base.__name__ for base in bases]:
            CONTOUR_EXTRACTOR_REGISTRY[cls.get_id()] = cls
        return cls


[docs]class ContourExtractor(six.with_metaclass(MetaContourExtractor)):
    """This class is an interface for all the contour extraction algorithms
    included in motif. Each extractor must inherit from it and implement the
    following method:
        - ``compute_contours``
    Additionally, two private helper functions are provided:
        - ``preprocess``
        - ``postprocess``
    These are meant to do common tasks for all the extractors and they should
    be called inside the process method if needed.

    Some methods may call a binary in the background, which creates a csv file.
    The csv file is loaded into memory and the file is deleted, unless
    ``clean=False``. When ``recompute=False``, this will first look for an
    existing precomputed contour file and if successful will load it directly.
    """
    def __init__(self):
        self.audio_channels = 1
        self.audio_bitdepth = 32
        self.audio_db_level = -3.0

    @property
    def audio_samplerate(self):
        """Property to get the sample rate of the output contours"""
        raise NotImplementedError("This property must return the sample rate "
                                  "of the output contours.")

    @property
    def sample_rate(self):
        """Property to get the sample rate of the output contours"""
        raise NotImplementedError("This property must return the sample rate "
                                  "of the output contours.")

    @property
    def min_contour_len(self):
        """Property to get the minimum length of a contour in seconds"""
        raise NotImplementedError("This property must return the minimum "
                                  "contour length in seconds.")

    @classmethod
[docs]    def get_id(cls):
        """Method to get the id of the extractor type"""
        raise NotImplementedError("This method must return a string identifier"
                                  " of the contour extraction type")

[docs]    def compute_contours(self, input_filepath):
        """Method for computing features for given file"""
        raise NotImplementedError("This method must contain the actual "
                                  "implementation of the contour extraction")

    def _preprocess_audio(self, audio_filepath, normalize_format=True,
                          normalize_volume=True, hpss=False,
                          equal_loudness_filter=False):
        '''Preprocess audio before computing contours

        Parameters
        ----------
        normalize : bool
            If True, normalize the audio
        hpss : bool
            If True, applies HPSS & computes contours on the harmonic compoment
        equal_loudness_filter : bool
            If True, applies an equal loudness filter to the audio

        '''
        tfm = sox.Transformer()
        if normalize_format:
            tfm.convert(
                samplerate=self.audio_samplerate,
                n_channels=self.audio_channels,
                bitdepth=self.audio_bitdepth
            )

        if normalize_volume:
            tfm.norm(db_level=self.audio_db_level)

        output_path = tmp.mktemp('.wav')
        tfm.build(audio_filepath, output_path)

        if hpss:
            raise NotImplementedError

        if equal_loudness_filter:
            raise NotImplementedError

        return output_path

    def _postprocess_contours(self, index, times, freqs, salience):
        """Remove contours that are too short.

        Parameters
        ----------
        index : np.array
            array of contour numbers
        times : np.array
            array of contour times
        freqs : np.array
            array of contour frequencies
        salience : np.array
            array of contour salience values

        Returns
        -------
        index_pruned : np.array
            Pruned array of contour numbers
        times_pruned : np.array
            Pruned array of contour times
        freqs_pruned : np.array
            Pruned array of contour frequencies
        salience_pruned : np.array
            Pruned array of contour salience values

        """
        keep_index = np.ones(times.shape).astype(bool)
        for i in set(index):
            this_idx = (index == i)
            if np.ptp(times[this_idx]) <= self.min_contour_len:
                keep_index[this_idx] = False

        return (index[keep_index], times[keep_index],
                freqs[keep_index], salience[keep_index])

    def _sort_contours(self, index, times, freqs, salience):
        """Sort contours by index and time.

        Parameters
        ----------
        index : np.array
            array of contour numbers
        times : np.array
            array of contour times
        freqs : np.array
            array of contour frequencies
        salience : np.array
            array of contour salience values

        Returns
        -------
        index_sorted : np.array
            Pruned array of contour numbers
        times_sorted : np.array
            Pruned array of contour times
        freqs_sorted : np.array
            Pruned array of contour frequencies
        salience_sorted : np.array
            Pruned array of contour salience values

        """
        sort_idx = np.lexsort((times, index))

        return (
            index[sort_idx], times[sort_idx], freqs[sort_idx],
            salience[sort_idx]
        )


###############################################################################
FEATURE_EXTRACTOR_REGISTRY = {}  # All available classifiers


[docs]class MetaFeatureExtractor(type):
    """Meta-class to register the available contour features."""
    def __new__(meta, name, bases, class_dict):
        cls = type.__new__(meta, name, bases, class_dict)
        # Register classes that inherit from the base class FeatureExtractor
        if "FeatureExtractor" in [base.__name__ for base in bases]:
            FEATURE_EXTRACTOR_REGISTRY[cls.get_id()] = cls
        return cls


[docs]class FeatureExtractor(six.with_metaclass(MetaFeatureExtractor)):
    """This class is an interface for all the feature extraction combinations
    included in motif. Each feature set must inherit from it and implement the
    following methods:
        - ``get_feature_vector``
            This should return a flat numpy array
        - ``feature_names``
            This should return a list of the same length as the above
            numpy array of what each dimension is. Can be as simple as an
            index, can be identfiers such as ['vibrato rate', 'vibrato extent']
    """
    def __init__(self):
        pass

[docs]    def get_feature_vector(self, times, freqs, salience, sample_rate):
        """Method for computing features for a given contour"""
        raise NotImplementedError("This method must contain the actual "
                                  "implementation of the contour feautres")

    @property
    def feature_names(self):
        """Set the array of features names."""
        raise NotImplementedError("This method must create and return a list "
                                  "of feature names, the same length as the"
                                  "feature vector.")

    @classmethod
[docs]    def get_id(cls):
        """Method to get the id of the feature type"""
        raise NotImplementedError("This method must return a string identifier"
                                  "of the feature type")

[docs]    def compute_all(self, ctr):
        """ Compute features for all contours.

        Parameters
        ----------
        ctr : Contour
            Instance of Contour object

        Returns
        -------
        features : np.array [n_contours, n_features]
            Feature matrix, ordered by contour number

        """
        features = []
        for i in ctr.nums:
            if len(ctr.index_mapping[i]) > 0:
                feature_vector = self.get_feature_vector(
                    ctr.contour_times(i),
                    ctr.contour_freqs(i),
                    ctr.contour_salience(i),
                    ctr.sample_rate
                )
                features.append(feature_vector)

        return np.array(features)


###############################################################################
CONTOUR_CLASSIFIER_REGISTRY = {}  # All available classifiers


[docs]class MetaContourClassifier(type):
    """Meta-class to register the available classifiers."""
    def __new__(meta, name, bases, class_dict):
        cls = type.__new__(meta, name, bases, class_dict)
        # Register classes that inherit from the base class ContourClassifier
        if "ContourClassifier" in [base.__name__ for base in bases]:
            CONTOUR_CLASSIFIER_REGISTRY[cls.get_id()] = cls
        return cls


[docs]class ContourClassifier(six.with_metaclass(MetaContourClassifier)):
    """This class is an interface for all the contour classifier algorithms
    included in motif. Each classifer must inherit from it and implement the
    following methods:
        - ``predict``
        - ``fit``
        - ``threshold``
    ``threshold`` should return a float whose determines the positive class
    threshold (e.g. ``score >= threshold`` : positive class,
    ``score < threshold`` : negative class)
    """
    def __init__(self):
        pass

    @property
    def threshold(self):
        """Property for setting threshold between classes"""
        raise NotImplementedError("This method most return a float that "
                                  "indicates the score cutoff between the "
                                  "positive and negative class.")

[docs]    def predict(self, X):
        """Method for predicting labels from input"""
        raise NotImplementedError("This method must contain the actual "
                                  "implementation of the prediction")

[docs]    def fit(self, X, Y):
        """Method for fitting the model"""
        raise NotImplementedError("This method must contain the actual "
                                  "implementation of the model fitting")

    @classmethod
[docs]    def get_id(cls):
        """Method to get the id of the extractor type"""
        raise NotImplementedError("This method must return a string identifier"
                                  " of the contour extraction type")

[docs]    def score(self, y_predicted, y_target, y_prob=None):
        """ Compute metrics on classifier predictions

        Parameters
        ----------
        y_predicted : np.array [n_samples]
            Predicted class labels
        y_target : np.array [n_samples]
            Target class labels
        y_prob : np.array [n_samples] or None, default=None
            predicted probabilties. If None, auc is not computed

        Returns
        -------
        scores : dict
            dictionary of scores for the following metrics:
            accuracy, matthews correlation coefficient, precision, recall, f1,
            support, confusion matrix, auc score
        """
        labels = set(y_target)
        labels.update(y_predicted)
        is_binary = len(labels) <= 2

        scores = {}
        scores['accuracy'] = metrics.accuracy_score(y_target, y_predicted)

        if is_binary:
            scores['mcc'] = metrics.matthews_corrcoef(y_target, y_predicted)
        else:
            scores['mcc'] = None

        (scores['precision'],
         scores['recall'],
         scores['f1'],
         scores['support']) = metrics.precision_recall_fscore_support(
             y_target, y_predicted
         )

        scores['confusion matrix'] = metrics.confusion_matrix(
            y_target, y_predicted, labels=list(labels)
        )

        if y_prob is not None:
            scores['auc score'] = metrics.roc_auc_score(
                y_target, y_prob + 1, average='weighted'
            )
        else:
            scores['auc score'] = None

        return scores


###############################################################################
CONTOUR_DECODER_REGISTRY = {}  # All available decoders


[docs]class MetaContourDecoder(type):
    """Meta-class to register the available decoders."""
    def __new__(meta, name, bases, class_dict):
        cls = type.__new__(meta, name, bases, class_dict)
        # Register classes that inherit from the base class ContourDecoder
        if "ContourDecoder" in [base.__name__ for base in bases]:
            CONTOUR_DECODER_REGISTRY[cls.get_id()] = cls
        return cls


[docs]class ContourDecoder(six.with_metaclass(MetaContourDecoder)):
    """This class is an interface for all the contour decoder algorithms
    included in motif. Each decoder must inherit from it and implement the
    following methods:
        - ``decode``
        - ``get_id``

    """
    def __init__(self):
        pass

[docs]    def decode(self, ctr, Y):
        """ Decode the output of the contour classifier.

        Parameters
        ----------
        ctr : Contours
            An instance of a Contours object
        Y : np.array [n_contours]
            Predicted contour scores.

        Returns
        -------
        times : np.ndarray
            Array of time stamps
        freqs : np.ndarray
            Array of f0 values in Hz

        """
        raise NotImplementedError("This method must contain the actual "
                                  "implementation of the decoder.")

    @classmethod
[docs]    def get_id(cls):
        """Method to get the id of the decoder type"""
        raise NotImplementedError("This method must return a string identifier"
                                  " of the contour decoder type")