"""Code for running the full pipeline
"""
import numpy as np
from .core import CONTOUR_EXTRACTOR_REGISTRY
from .core import FEATURE_EXTRACTOR_REGISTRY
from .core import CONTOUR_CLASSIFIER_REGISTRY
def process(audio_files=None, training_pairs=None, testing_pairs=None,
extract_id='salamon', feature_id='bitteli',
classifier_id='random_forest'):
contour_extractor = get_extract_module(extract_id)
feature_extractor = get_features_module(feature_id)
contour_classifier = get_classify_module(classifier_id)
if training_pairs is not None:
X_train, Y_train, train_contours = process_with_labels(
contour_extractor, feature_extractor, training_pairs
)
contour_classifier.fit(X_train, Y_train)
# get training score
Y_prob = contour_classifier.predict(X_train)
Y_pred = (np.array(Y_prob >= contour_classifier.threshold)).astype(int)
train_scores = contour_classifier.score(Y_pred, Y_train, y_prob=Y_prob)
if testing_pairs is not None:
X_test, Y_test, test_contours = process_with_labels(
contour_extractor, feature_extractor, testing_pairs
)
# get testing score
Y_pred = contour_classifier.predict(X_test)
try:
test_scores = contour_classifier.score(Y_pred, Y_test)
except ValueError:
test_scores = {}
if audio_files is not None:
contour_list = process_audio_only(
contour_extractor, feature_extractor, contour_classifier,
audio_files
)
return (
train_scores, test_scores, train_contours, test_contours, contour_list
)
def process_audio_only(contour_extractor, feature_extractor,
contour_classifier, audio_files):
contour_list = []
for audio_filepath in audio_files:
ctr = contour_extractor.compute_contours(audio_filepath)
X = feature_extractor.compute_all(ctr)
Y = contour_classifier.predict(X)
contour_list.append((ctr, X, Y))
return contour_list
[docs]def process_with_labels(contour_extractor, feature_extractor, file_pairs):
"""Obtains a configured Classifier given an algorithm identificator.
Parameters
----------
classifier_id : str
Classifier algorithm identificator (e.g., random_forest, mv_gaussian).
Returns
-------
module : object
Object containing the selected Classifier module.
None if no extract module is needed.
"""
contour_list = []
features_list = []
labels_list = []
for audio_filepath, annotation in file_pairs:
ctr = contour_extractor.compute_contours(audio_filepath)
Y_train, _ = ctr.compute_labels(annotation)
X_train = feature_extractor.compute_all(ctr)
features_list.append(X_train)
labels_list.append(Y_train)
contour_list.append(ctr)
X = np.concatenate(features_list)
Y = np.concatenate(labels_list)
return X, Y, contour_list
[docs]def get_module(module_id, module_registry):
"""Obtains a configured ContourFeatures given an algorithm identificator.
Parameters
----------
module_id : str
Module identificator (e.g., bitteli, melodia).
module_registry : dict
Dictionary of module_ids to class instances
Returns
-------
module : object
Object containing the selected module.
None if no module is needed.
"""
if module_id is None:
return None
try:
module = module_registry[module_id]()
except KeyError:
raise RuntimeError("Algorithm %s can not be found in motif!" %
module_id)
return module
[docs]def get_features_module(feature_id):
"""Obtains a configured ContourFeatures given an algorithm identificator.
Parameters
----------
feature_id : str
Feature algorithm identificator (e.g., bitteli, melodia).
Returns
-------
module : object
Object containing the selected ContourFeatures module.
None if no extract module is needed.
"""
return get_module(feature_id, FEATURE_EXTRACTOR_REGISTRY)
[docs]def get_classify_module(classifier_id):
"""Obtains a configured Classifier given an algorithm identificator.
Parameters
----------
classifier_id : str
Classifier algorithm identificator (e.g., random_forest, mv_gaussian).
Returns
-------
module : object
Object containing the selected Classifier module.
None if no extract module is needed.
"""
return get_module(classifier_id, CONTOUR_CLASSIFIER_REGISTRY)