from ..data_processing import DataProcessor
from ..support_files.helpers import partition_interval
from ..support_files import MessageCodes
from ..configs import sections_settings
from ..models import create_model
from datetime import datetime
import time
import pytz
from copy import deepcopy
import logging
logger = logging.getLogger('traveltimes_prediction')
[docs]class SectionInterface:
"""
Class - interface of the individual sections. Serves to train models for sections and predict using these models.
"""
def __init__(self, section, models):
"""
Constructor.
:param string section: e.g. 'KOCE-LNCE'
:param list models: list of model classes to be trained for this section, e.g. [ClusterModel, TimeDomainModel]
"""
self.section = section
self.models = {m.name: {'model': None,
'timestamp': None
} for m in models}
# @profile
[docs] def predict(self, time_interval, db_interface):
"""
Method for prediction of the traveltime, taking data from given time_interval.
:param dict time_interval: {'from': datetime, 'to': datetime}
:param object db_interface: instance of DBInterface
:return: list of dict - info about the process of prediction
"""
model_return_vals = []
confidence = 0.0
utc_time_interval = dict()
tzone = pytz.timezone(sections_settings[self.section]['timezone'])
utc_time_interval['from'] = tzone.localize(time_interval['from'])
utc_time_interval['to'] = tzone.localize(time_interval['to'])
utc_time_interval['from'] = utc_time_interval['from'].astimezone(pytz.utc)
utc_time_interval['to'] = utc_time_interval['to'].astimezone(pytz.utc)
# time for which is predicted
time_for_prediction = time_interval['to']
some_valid_models = self._get_models(db_interface=db_interface)
if some_valid_models: # If there is at least one valid model, try to get data from database
feature_df, confidence = self._get_data(time_interval=utc_time_interval)
else: # If all models are invalid, return error codes.
for model_type in self.models.keys():
messages = [MessageCodes.MODEL_NOT_IN_DB, MessageCodes.PREDICTION_UNSUCCESSFUL]
logger.error("[%s] Unable to load model - %s - from DB, prediction cannot be executed...",
self.section, model_type)
msg_tmp = {'predicted_value': -1, 'messages': messages, 'bck': -1, 'time': time_for_prediction,
'delay': -1, 'model_type': model_type, 'confidence': confidence}
model_return_vals.append(msg_tmp)
return model_return_vals
if feature_df is None: # Check if the attempt to retrieve data was successful
logger.error("Aggregation of features was unsucessuful !!")
messages = [MessageCodes.DATA_AGGREGATION_FAILED, MessageCodes.PREDICTION_UNSUCCESSFUL]
tmp = {'predicted_value': -1, 'messages': messages, 'bck': -1, 'time': time_for_prediction,
'delay': -1, 'model_type': -1, 'confidence': confidence}
model_return_vals.append(tmp)
return model_return_vals
# Engineering of features - it is the same for all the models.
tmp_messages = []
if feature_df is not None and not feature_df.empty:
logger.info('[%s] Features for prediction have been engineered ...', self.section)
else:
tmp_messages.append(MessageCodes.FEATURE_ENGINEERING_FAILED)
logger.warning(
'[%s] Features for prediction have not been engineered !', self.section)
# Go through all models and to predict.
for model_type, model_val in self.models.items():
messages = deepcopy(tmp_messages) # To share the data processing messages among all models
# Measure how long does take prediction itself.
tick = time.time()
try:
predicted_tt = model_val['model'].predict(feature_df)
except Exception as e:
logger.exception(e)
predicted_tt = False
if model_val['model'] is None:
messages.append(MessageCodes.MODEL_NOT_IN_DB)
tock = time.time()
# Check if prediction was successful
if predicted_tt:
messages.append(MessageCodes.PREDICTION_SUCCESSFUL)
calculated_for_time = time_for_prediction
# bck_tt = feature_df[[x for x in feature_df.index if ColumnNames.FEAT_TT_BCK in x][0]]
else:
messages.append(MessageCodes.PREDICTION_UNSUCCESSFUL)
predicted_tt = -1
# bck_tt = -1
confidence = 0.0
calculated_for_time = time_for_prediction
logger.debug('[%s] Model - %s - has predicted - %s', self.section, model_type, predicted_tt)
msg_tmp = {'predicted_value': predicted_tt, 'messages': messages, 'bck': None,
'time': calculated_for_time, 'delay': tock - tick, 'model_type': model_type, 'confidence': confidence}
model_return_vals.append(msg_tmp)
return model_return_vals
def _get_data(self, time_interval):
"""
Method for retrieval of the data from DB and producing features` Dataframes
:param dict time_interval: {'from': datetime, 'to': datetime}
:return: tuple (pd.Dataframe - features, int - confidence)
"""
# Try to retrieve data for prediction
logger.info('[%s] Retrieving data for prediction ...', self.section)
time_interval_list = partition_interval(time_interval=time_interval)
DP = DataProcessor(section=self.section)
feature_df, confidence = DP.get_features(time_interval_list=time_interval_list)
if feature_df is not None and not feature_df.empty:
feature_df = feature_df.sort_index().iloc[-1] # For prediction pick just the last record
else:
feature_df = None
return feature_df, confidence
def _get_models(self, db_interface):
"""
Method for retrieving models from DB.
:param DBInterface db_interface: instance of DBInterface
:return: boolean - True if there is at least one valid model (including the loaded-ones)
"""
valid_models = 0
for model_type in self.models.keys():
try:
up_to_date = self._check_up_to_date_model(db_interface=db_interface, model_type=model_type)
if not up_to_date:
model_dump = db_interface.load_model(section=self.section, model_type=model_type)
logger.info("[%s] Loading new model -- %s...", self.section, model_type)
if model_dump is not None and model_dump:
self.models[model_type]['model'] = create_model(model_dump)
self.models[model_type]['timestamp'] = datetime.now()
valid_models += 1
logger.info("[%s] New model loaded -- %s", self.section, model_type)
else:
self.models[model_type]['model'] = None
self.models[model_type]['timestamp'] = None
logger.info("[%s] Unable to load model -- %s", self.section, model_type)
else:
valid_models += 1
logger.info("[%s] Model -- %s -- is up to date", self.section, model_type)
except Exception as e:
logger.exception(e)
return True if valid_models else False
def _check_up_to_date_model(self, db_interface, model_type):
"""
Method for checking is model in memory is up-to-date.
:param DBInterface db_interface: instance of DBInterface
:param string model_type: name of the model which should be checked.
:return: boolean - True if models is in memory and is up-to-date comparing with model stored in DB.
"""
last_timestamp = db_interface.model_timestamp(section=self.section, model_type=model_type)
if (self.models[model_type]['timestamp'] is None) or (last_timestamp is None) or \
(last_timestamp > self.models[model_type]['timestamp']):
return False
return True