Source code for traveltimes_prediction.models.algorithms.mbkmeans_wrapper

from sklearn.cluster import MiniBatchKMeans
import numpy as np
from copy import  deepcopy


[docs]class MBKMeansWrapper(MiniBatchKMeans): """ Class wrapper for MiniBatchKMeans. """ name='MBKMeans' def __init__(self, **kwargs): """ Constructor. :param dict kwargs: """ super().__init__(**kwargs) self._median_imputer = None
[docs] def fit(self, X, y=None): """ Method for fitting of the model. :param numpy.ndarray X: :param numpy.ndarray y: :return: object - self """ self._get_descriptors(X=X) return super().fit(X=X, y=y)
[docs] def predict(self, X): """ Method for prediction of the output cluster. :param numpy.ndarray X: :return: np.ndarray """ X = self._impute_prediction_sample(X=X) return super().predict(X=X)
[docs] def dump(self): """ Method for dumping of the existing model. :return: dict """ d = dict() d['model'] = dict() d['model']['cluster_centers_'] = self.cluster_centers_.tolist() d['model']['labels_'] = self.labels_.tolist() d['model']['inertia_'] = self.inertia_ d['model']['_median_imputer'] = self._median_imputer.tolist() d['model_type'] = self.name return d
@staticmethod
[docs] def load(model): """ Method for the loading - recreating of the dumped model. :param dict model: :return: MBKMeans """ inst = MBKMeansWrapper() inst.cluster_centers_ = np.array(model['cluster_centers_']) inst.labels_ = np.array(model['labels_']) inst.inertia_ = model['inertia_'] inst._median_imputer = np.array(model['_median_imputer']) return inst
def _get_descriptors(self, X): """ Method for retrieving the descriptors of the training data. Can be used for imputing during prediction phase. :param np.array X: Training data """ self._median_imputer = np.median(X, axis=0) def _impute_prediction_sample(self, X): """ Method for imputing of the data. Uses descriptors retrieved during training. :param np.array X: :return: np.array """ _X = deepcopy(X) if len(_X.shape) > 1: l = [] for item in _X: is_nan = np.isnan(item) item[is_nan] = self._median_imputer[is_nan] l.append(item) return np.array(l) else: is_nan = np.isnan(X) _X[is_nan] = self._median_imputer[is_nan] return _X