Source code for traveltimes_prediction.models.algorithms.dbscan_wrapper
from sklearn.cluster import DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.exceptions import NotFittedError
from ..base_model import BaseModel
import numpy as np
[docs]class DBScanWrapper(BaseModel, DBSCAN):
"""
Class-wrapper for DBSCAN - allowing to predict the cluster of given sample.
"""
name='DBSCAN'
def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
algorithm='auto', leaf_size=30, p=None, n_jobs=1):
super().__init__(eps=eps, min_samples=min_samples, metric=metric,
algorithm=algorithm, leaf_size=leaf_size, p=p, n_jobs=n_jobs)
self._knn_clf = KNeighborsClassifier(n_neighbors=5)
self._knn_X = None
self._knn_y = None
[docs] def fit(self, X, y=None, sample_weight=None):
"""
Fit method for retrieving the cluster labels. The predictor of clusters` labels is fitted here too.
:param np.ndarray X: features matrix -> SxF
:param np.ndarray y: true values vector -> S
:param sample_weight:
:return: self
"""
super(DBScanWrapper, self).fit(X=X, y=y, sample_weight=sample_weight)
core_labels = self.labels_[self.core_sample_indices_]
noise = X[self.labels_ == -1]
self._knn_X = np.vstack((self.components_, noise))
self._knn_y = np.hstack((core_labels, [-1]*len(noise)))
self._knn_clf.fit(X=self._knn_X, y=self._knn_y)
return self
[docs] def predict(self, X):
"""
Method for prediction of the cluster for given samples. Cluster is predicted using kNN algorithm.
:param np.ndarray X: samples to predict(feature) -> SxF
:return: np.ndarray predicted labels -> S
"""
try:
return self._knn_clf.predict(X=X)
except NotFittedError:
self._knn_clf.fit(X=self._knn_X, y=self._knn_y)
return self._knn_clf.predict(X=X)
[docs] def dump(self):
"""
Method for dumping of the important features of algorithm allowing to reconstruct it.
:return: dict - important features are values in dict with corresponding keys
"""
d = dict()
d['model'] = dict()
d['model']['labels_'] = self.labels_.tolist()
d['model']['core_sample_indices_'] = self.core_sample_indices_.tolist()
d['model']['components_'] = self.components_.tolist()
d['model']['_knn_X'] = self._knn_X.tolist()
d['model']['_knn_y'] = self._knn_y.tolist()
d['model_type'] = self.name
return d
@staticmethod
[docs] def load(dumped_model):
"""
Method for loading (setting) the clusterizer from dumped dict.
:param dict dumped_model:
:return: self
"""
inst = DBScanWrapper()
inst.labels_ = np.array(dumped_model['labels_'])
inst.core_sample_indices_ = np.array(dumped_model['core_sample_indices_'])
inst.components_ = np.array(dumped_model['components_'])
inst._knn_X = np.array(dumped_model['_knn_X'])
inst._knn_y = np.array(dumped_model['_knn_y'])
return inst