Source code for traveltimes_prediction.models.time_domain_model
from sklearn import base
import numpy as np
import pandas as pd
import collections
from .base_model import BaseModel
from ..support_files import ColumnNames
from ..support_files.helpers import check_params
[docs]class TimeDomainModel(base.BaseEstimator, BaseModel):
"""
Regression model.
Split data from each day to X minute windows (according to X[ColumnNames.FEAT_TIME_BIN]). Group windows by time.
For each group of windows + adjacent groups to this group train individual regressor.
"""
name = 'TimeDomainModel'
def __init__(self, regressor=None, regressor_params=None):
"""
Constructor.
:param class regressor: Regressor to be used for each model of window.
:param dict regressor_params: params of the regressor.
"""
super(TimeDomainModel, self).__init__()
self.regressor = regressor
self.regressor_params = regressor_params
self.regressor_dict = None # dictionary where the regressors - models of each window are created.
[docs] def fit(self, X, Y):
"""
Method for fitting of the model.
:param pp.DataFrame X: data matrix X for training
:param pd.DataFrame Y: data vector Y - outputs
:return: self
"""
if X is None:
raise ValueError('[%s] None data for fitting !!!' % self.name)
if ColumnNames.FEAT_TIME_BIN not in X.columns:
raise ValueError('[%s] Data does not contain time_bin for creation of the time windows !!!' % self.name)
time_window_col = X[ColumnNames.FEAT_TIME_BIN].values
columns = list(X.columns)
columns.remove(ColumnNames.FEAT_TIME_BIN)
_X = X[columns].values
self.regressor_dict = dict()
time_windows = np.unique(time_window_col) # Get all time-windows
for i, window in enumerate(time_windows): # For each window, train classifier
query1 = time_window_col == window
query2 = time_window_col == time_windows[i-1]
query3 = time_window_col == time_windows[i + 1 if i < len(time_windows)-1 else -1]
query = np.bitwise_or(query1, query2)
query = np.bitwise_or(query, query3)
train_x = _X[query, :]
train_y = Y[query]
self.regressor_dict[int(window)] = self.regressor(**self.regressor_params).fit(X=train_x, y=train_y)
return self
@check_params
def predict(self, X):
"""
Method for prediction for given feature vector.
:param pd.DataFrame X: or pandas.Series - feature vector
:return: list - predicted travel time
"""
windows = X[ColumnNames.FEAT_TIME_BIN].values if isinstance(X, pd.DataFrame) else X[ColumnNames.FEAT_TIME_BIN]
if not isinstance(windows, collections.Iterable):
windows = [windows]
_X = X.drop(ColumnNames.FEAT_TIME_BIN, axis=1).values if isinstance(X, pd.DataFrame) else X.drop(ColumnNames.FEAT_TIME_BIN).values
_X = _X.reshape(1, -1) if len(_X.shape) == 1 else _X
results = []
_last = None
for i, w in enumerate(windows):
if w in self.regressor_dict.keys():
res = self.regressor_dict[w].predict(X=_X[i, :].reshape(1, -1))
_last = res
results.extend(res)
else:
results.extend(_last if _last is not None else [-1])
return np.array(results) if len(results) > 1 else results[0]
@staticmethod
[docs] def load(model):
"""
Method for creation of the model from its dump.
:param dict model: Dumped model, keys are params` names, values are values.
:return: object - instance of TimeDomainModel created from its dump
"""
inst = TimeDomainModel()
from .create_model import create_model
# Converted to int, because n JSON it has to be represented as string
inst.regressor_dict = {int(k): create_model(v) for k, v in model['regressor_dict'].items()}
return inst
[docs] def dump(self):
"""
Method for for dumping of the model.
:return: dict - Model dumped as dictionary, with keys as params` names and values as values of the params
"""
d = dict()
d['model'] = dict()
# Conversion of keys to string because JSON requires that
d['model']['regressor_dict'] = {str(r): self.regressor_dict[r].dump() for r in self.regressor_dict.keys()}
d['model_type'] = self.name
return d