Source code for traveltimes_prediction.models.time_domain_model

from sklearn import base
import numpy as np
import pandas as pd
import collections

from .base_model import BaseModel
from ..support_files import ColumnNames
from ..support_files.helpers import  check_params


[docs]class TimeDomainModel(base.BaseEstimator, BaseModel): """ Regression model. Split data from each day to X minute windows (according to X[ColumnNames.FEAT_TIME_BIN]). Group windows by time. For each group of windows + adjacent groups to this group train individual regressor. """ name = 'TimeDomainModel' def __init__(self, regressor=None, regressor_params=None): """ Constructor. :param class regressor: Regressor to be used for each model of window. :param dict regressor_params: params of the regressor. """ super(TimeDomainModel, self).__init__() self.regressor = regressor self.regressor_params = regressor_params self.regressor_dict = None # dictionary where the regressors - models of each window are created.
[docs] def fit(self, X, Y): """ Method for fitting of the model. :param pp.DataFrame X: data matrix X for training :param pd.DataFrame Y: data vector Y - outputs :return: self """ if X is None: raise ValueError('[%s] None data for fitting !!!' % self.name) if ColumnNames.FEAT_TIME_BIN not in X.columns: raise ValueError('[%s] Data does not contain time_bin for creation of the time windows !!!' % self.name) time_window_col = X[ColumnNames.FEAT_TIME_BIN].values columns = list(X.columns) columns.remove(ColumnNames.FEAT_TIME_BIN) _X = X[columns].values self.regressor_dict = dict() time_windows = np.unique(time_window_col) # Get all time-windows for i, window in enumerate(time_windows): # For each window, train classifier query1 = time_window_col == window query2 = time_window_col == time_windows[i-1] query3 = time_window_col == time_windows[i + 1 if i < len(time_windows)-1 else -1] query = np.bitwise_or(query1, query2) query = np.bitwise_or(query, query3) train_x = _X[query, :] train_y = Y[query] self.regressor_dict[int(window)] = self.regressor(**self.regressor_params).fit(X=train_x, y=train_y) return self
@check_params def predict(self, X): """ Method for prediction for given feature vector. :param pd.DataFrame X: or pandas.Series - feature vector :return: list - predicted travel time """ windows = X[ColumnNames.FEAT_TIME_BIN].values if isinstance(X, pd.DataFrame) else X[ColumnNames.FEAT_TIME_BIN] if not isinstance(windows, collections.Iterable): windows = [windows] _X = X.drop(ColumnNames.FEAT_TIME_BIN, axis=1).values if isinstance(X, pd.DataFrame) else X.drop(ColumnNames.FEAT_TIME_BIN).values _X = _X.reshape(1, -1) if len(_X.shape) == 1 else _X results = [] _last = None for i, w in enumerate(windows): if w in self.regressor_dict.keys(): res = self.regressor_dict[w].predict(X=_X[i, :].reshape(1, -1)) _last = res results.extend(res) else: results.extend(_last if _last is not None else [-1]) return np.array(results) if len(results) > 1 else results[0] @staticmethod
[docs] def load(model): """ Method for creation of the model from its dump. :param dict model: Dumped model, keys are params` names, values are values. :return: object - instance of TimeDomainModel created from its dump """ inst = TimeDomainModel() from .create_model import create_model # Converted to int, because n JSON it has to be represented as string inst.regressor_dict = {int(k): create_model(v) for k, v in model['regressor_dict'].items()} return inst
[docs] def dump(self): """ Method for for dumping of the model. :return: dict - Model dumped as dictionary, with keys as params` names and values as values of the params """ d = dict() d['model'] = dict() # Conversion of keys to string because JSON requires that d['model']['regressor_dict'] = {str(r): self.regressor_dict[r].dump() for r in self.regressor_dict.keys()} d['model_type'] = self.name return d