Source code for traveltimes_prediction.models.algorithms.mbkmeans_wrapper
from sklearn.cluster import MiniBatchKMeans
import numpy as np
from copy import deepcopy
[docs]class MBKMeansWrapper(MiniBatchKMeans):
"""
Class wrapper for MiniBatchKMeans.
"""
name='MBKMeans'
def __init__(self, **kwargs):
"""
Constructor.
:param dict kwargs:
"""
super().__init__(**kwargs)
self._median_imputer = None
[docs] def fit(self, X, y=None):
"""
Method for fitting of the model.
:param numpy.ndarray X:
:param numpy.ndarray y:
:return: object - self
"""
self._get_descriptors(X=X)
return super().fit(X=X, y=y)
[docs] def predict(self, X):
"""
Method for prediction of the output cluster.
:param numpy.ndarray X:
:return: np.ndarray
"""
X = self._impute_prediction_sample(X=X)
return super().predict(X=X)
[docs] def dump(self):
"""
Method for dumping of the existing model.
:return: dict
"""
d = dict()
d['model'] = dict()
d['model']['cluster_centers_'] = self.cluster_centers_.tolist()
d['model']['labels_'] = self.labels_.tolist()
d['model']['inertia_'] = self.inertia_
d['model']['_median_imputer'] = self._median_imputer.tolist()
d['model_type'] = self.name
return d
@staticmethod
[docs] def load(model):
"""
Method for the loading - recreating of the dumped model.
:param dict model:
:return: MBKMeans
"""
inst = MBKMeansWrapper()
inst.cluster_centers_ = np.array(model['cluster_centers_'])
inst.labels_ = np.array(model['labels_'])
inst.inertia_ = model['inertia_']
inst._median_imputer = np.array(model['_median_imputer'])
return inst
def _get_descriptors(self, X):
"""
Method for retrieving the descriptors of the training data. Can be used for imputing during prediction phase.
:param np.array X: Training data
"""
self._median_imputer = np.median(X, axis=0)
def _impute_prediction_sample(self, X):
"""
Method for imputing of the data. Uses descriptors retrieved during training.
:param np.array X:
:return: np.array
"""
_X = deepcopy(X)
if len(_X.shape) > 1:
l = []
for item in _X:
is_nan = np.isnan(item)
item[is_nan] = self._median_imputer[is_nan]
l.append(item)
return np.array(l)
else:
is_nan = np.isnan(X)
_X[is_nan] = self._median_imputer[is_nan]
return _X