Source code for motif.contour_classifiers.random_forest
"""Random Forest contour classifier.
"""
from __future__ import print_function
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils import shuffle
from scipy.stats import randint as sp_randint
from motif.core import ContourClassifier
[docs]class RandomForest(ContourClassifier):
'''Random Forest contour classifier.
Attributes
----------
n_estimators : int
Number of trees in the forest
n_jobs : int
Number of cores to use. -1 uses maximum availalbe
class_weight : str
How to set class weights.
max_features : int or None
The maximum number of features that can be used in a single branch.
max_param : int
Maximum depth value to sweep
param_step : int
Step size in parameter sweep
clf : sklearn.ensemble.RandomForestClassifier
Classifier
max_depth : int
The max_depth parameter chosen by cross validation.
'''
def __init__(self, n_estimators=50, n_jobs=-1, class_weight='balanced',
n_iter_search=100, random_state=None):
'''
Parameters
----------
n_estimators : int
Number of trees in the forest
n_jobs : int
Number of cores to use. -1 uses maximum availalbe
class_weight : str
How to set class weights.
n_iter_search : int
Number of iterations to search
random_state : int or None
Optional random seed to reproduce results
'''
ContourClassifier.__init__(self)
self.n_estimators = n_estimators
self.n_jobs = n_jobs
self.class_weight = class_weight
self.n_iter_search = n_iter_search
self.random_state = random_state
self.clf = None
[docs] def predict(self, X):
""" Compute probability predictions.
Parameters
----------
X : np.array [n_samples, n_features]
Features.
Returns
-------
p : np.array [n_samples]
predicted probabilities
"""
if self.clf is None:
raise ReferenceError(
"fit must be called before predict can be called"
)
p = self.clf.predict_proba(X)[:, 1]
return p
[docs] def predict_discrete_label(self, X):
""" Compute discrete class predictions.
Parameters
----------
X : np.array [n_samples, n_features]
Features.
Returns
-------
Y_pred : np.array [n_samples]
predicted classes
"""
if self.clf is None:
raise ReferenceError(
"fit must be called before predict can be called"
)
Y_pred = self.clf.predict(X)
return Y_pred
[docs] def fit(self, X, Y):
""" Train classifier.
Parameters
----------
X : np.array [n_samples, n_features]
Training features.
Y : np.array [n_samples]
Training labels
"""
x_shuffle, y_shuffle = shuffle(X, Y, random_state=self.random_state)
clf_cv = RFC(n_estimators=self.n_estimators, n_jobs=self.n_jobs,
class_weight=self.class_weight,
random_state=self.random_state)
param_dist = {
"max_depth": sp_randint(1, 101),
"max_features": [None, 'auto', 'sqrt', 'log2'],
"min_samples_split": sp_randint(2, 11),
"min_samples_leaf": sp_randint(1, 11),
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]
}
random_search = RandomizedSearchCV(
clf_cv, param_distributions=param_dist, refit=True,
n_iter=self.n_iter_search, scoring='f1_weighted',
random_state=self.random_state
)
random_search.fit(x_shuffle, y_shuffle)
self.clf = random_search.best_estimator_
@property
def threshold(self):
""" The threshold determining the positive class.
Returns
-------
threshold : flaot
melodiness scores
"""
return 0.5
@classmethod
[docs] def get_id(cls):
""" The ContourClassifier identifier
Returns
-------
id : string
class identifier
"""
return 'random_forest'