Source code for traveltimes_prediction.models.algorithms.dbscan_wrapper

from sklearn.cluster import DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.exceptions import NotFittedError

from ..base_model import BaseModel
import numpy as np


[docs]class DBScanWrapper(BaseModel, DBSCAN): """ Class-wrapper for DBSCAN - allowing to predict the cluster of given sample. """ name='DBSCAN' def __init__(self, eps=0.5, min_samples=5, metric='euclidean', algorithm='auto', leaf_size=30, p=None, n_jobs=1): super().__init__(eps=eps, min_samples=min_samples, metric=metric, algorithm=algorithm, leaf_size=leaf_size, p=p, n_jobs=n_jobs) self._knn_clf = KNeighborsClassifier(n_neighbors=5) self._knn_X = None self._knn_y = None
[docs] def fit(self, X, y=None, sample_weight=None): """ Fit method for retrieving the cluster labels. The predictor of clusters` labels is fitted here too. :param np.ndarray X: features matrix -> SxF :param np.ndarray y: true values vector -> S :param sample_weight: :return: self """ super(DBScanWrapper, self).fit(X=X, y=y, sample_weight=sample_weight) core_labels = self.labels_[self.core_sample_indices_] noise = X[self.labels_ == -1] self._knn_X = np.vstack((self.components_, noise)) self._knn_y = np.hstack((core_labels, [-1]*len(noise))) self._knn_clf.fit(X=self._knn_X, y=self._knn_y) return self
[docs] def predict(self, X): """ Method for prediction of the cluster for given samples. Cluster is predicted using kNN algorithm. :param np.ndarray X: samples to predict(feature) -> SxF :return: np.ndarray predicted labels -> S """ try: return self._knn_clf.predict(X=X) except NotFittedError: self._knn_clf.fit(X=self._knn_X, y=self._knn_y) return self._knn_clf.predict(X=X)
[docs] def dump(self): """ Method for dumping of the important features of algorithm allowing to reconstruct it. :return: dict - important features are values in dict with corresponding keys """ d = dict() d['model'] = dict() d['model']['labels_'] = self.labels_.tolist() d['model']['core_sample_indices_'] = self.core_sample_indices_.tolist() d['model']['components_'] = self.components_.tolist() d['model']['_knn_X'] = self._knn_X.tolist() d['model']['_knn_y'] = self._knn_y.tolist() d['model_type'] = self.name return d
@staticmethod
[docs] def load(dumped_model): """ Method for loading (setting) the clusterizer from dumped dict. :param dict dumped_model: :return: self """ inst = DBScanWrapper() inst.labels_ = np.array(dumped_model['labels_']) inst.core_sample_indices_ = np.array(dumped_model['core_sample_indices_']) inst.components_ = np.array(dumped_model['components_']) inst._knn_X = np.array(dumped_model['_knn_X']) inst._knn_y = np.array(dumped_model['_knn_y']) return inst