Source code for motif.run

"""Code for running the full pipeline
"""
import numpy as np

from .core import CONTOUR_EXTRACTOR_REGISTRY
from .core import FEATURE_EXTRACTOR_REGISTRY
from .core import CONTOUR_CLASSIFIER_REGISTRY


def process(audio_files=None, training_pairs=None, testing_pairs=None,
            extract_id='salamon', feature_id='bitteli',
            classifier_id='random_forest'):

    contour_extractor = get_extract_module(extract_id)
    feature_extractor = get_features_module(feature_id)
    contour_classifier = get_classify_module(classifier_id)

    if training_pairs is not None:
        X_train, Y_train, train_contours = process_with_labels(
            contour_extractor, feature_extractor, training_pairs
        )
        contour_classifier.fit(X_train, Y_train)

        # get training score
        Y_prob = contour_classifier.predict(X_train)
        Y_pred = (np.array(Y_prob >= contour_classifier.threshold)).astype(int)
        train_scores = contour_classifier.score(Y_pred, Y_train, y_prob=Y_prob)

    if testing_pairs is not None:
        X_test, Y_test, test_contours = process_with_labels(
            contour_extractor, feature_extractor, testing_pairs
        )

        # get testing score
        Y_pred = contour_classifier.predict(X_test)
        try:
            test_scores = contour_classifier.score(Y_pred, Y_test)
        except ValueError:
            test_scores = {}

    if audio_files is not None:
        contour_list = process_audio_only(
            contour_extractor, feature_extractor, contour_classifier,
            audio_files
        )

    return (
        train_scores, test_scores, train_contours, test_contours, contour_list
    )


def process_audio_only(contour_extractor, feature_extractor,
                       contour_classifier, audio_files):

    contour_list = []
    for audio_filepath in audio_files:
        ctr = contour_extractor.compute_contours(audio_filepath)

        X = feature_extractor.compute_all(ctr)
        Y = contour_classifier.predict(X)

        contour_list.append((ctr, X, Y))
    return contour_list


[docs]def process_with_labels(contour_extractor, feature_extractor, file_pairs): """Obtains a configured Classifier given an algorithm identificator. Parameters ---------- classifier_id : str Classifier algorithm identificator (e.g., random_forest, mv_gaussian). Returns ------- module : object Object containing the selected Classifier module. None if no extract module is needed. """ contour_list = [] features_list = [] labels_list = [] for audio_filepath, annotation in file_pairs: ctr = contour_extractor.compute_contours(audio_filepath) Y_train, _ = ctr.compute_labels(annotation) X_train = feature_extractor.compute_all(ctr) features_list.append(X_train) labels_list.append(Y_train) contour_list.append(ctr) X = np.concatenate(features_list) Y = np.concatenate(labels_list) return X, Y, contour_list
[docs]def get_module(module_id, module_registry): """Obtains a configured ContourFeatures given an algorithm identificator. Parameters ---------- module_id : str Module identificator (e.g., bitteli, melodia). module_registry : dict Dictionary of module_ids to class instances Returns ------- module : object Object containing the selected module. None if no module is needed. """ if module_id is None: return None try: module = module_registry[module_id]() except KeyError: raise RuntimeError("Algorithm %s can not be found in motif!" % module_id) return module
[docs]def get_extract_module(extract_id): """Obtains a configured ContourExtractor given an algorithm identificator. Parameters ---------- extract_id : str Extract algorithm identificator (e.g., salamon, hll). Returns ------- module : object Object containing the selected ContourExtractor module. None if no extract module is needed. """ return get_module(extract_id, CONTOUR_EXTRACTOR_REGISTRY)
[docs]def get_features_module(feature_id): """Obtains a configured ContourFeatures given an algorithm identificator. Parameters ---------- feature_id : str Feature algorithm identificator (e.g., bitteli, melodia). Returns ------- module : object Object containing the selected ContourFeatures module. None if no extract module is needed. """ return get_module(feature_id, FEATURE_EXTRACTOR_REGISTRY)
[docs]def get_classify_module(classifier_id): """Obtains a configured Classifier given an algorithm identificator. Parameters ---------- classifier_id : str Classifier algorithm identificator (e.g., random_forest, mv_gaussian). Returns ------- module : object Object containing the selected Classifier module. None if no extract module is needed. """ return get_module(classifier_id, CONTOUR_CLASSIFIER_REGISTRY)