Source code for traveltimes_prediction.interface.section_interface

from ..data_processing import DataProcessor
from ..support_files.helpers import partition_interval
from ..support_files import MessageCodes
from ..configs import sections_settings
from ..models import create_model


from datetime import datetime
import time
import pytz
from copy import deepcopy

import logging
logger = logging.getLogger('traveltimes_prediction')


[docs]class SectionInterface: """ Class - interface of the individual sections. Serves to train models for sections and predict using these models. """ def __init__(self, section, models): """ Constructor. :param string section: e.g. 'KOCE-LNCE' :param list models: list of model classes to be trained for this section, e.g. [ClusterModel, TimeDomainModel] """ self.section = section self.models = {m.name: {'model': None, 'timestamp': None } for m in models} # @profile
[docs] def predict(self, time_interval, db_interface): """ Method for prediction of the traveltime, taking data from given time_interval. :param dict time_interval: {'from': datetime, 'to': datetime} :param object db_interface: instance of DBInterface :return: list of dict - info about the process of prediction """ model_return_vals = [] confidence = 0.0 utc_time_interval = dict() tzone = pytz.timezone(sections_settings[self.section]['timezone']) utc_time_interval['from'] = tzone.localize(time_interval['from']) utc_time_interval['to'] = tzone.localize(time_interval['to']) utc_time_interval['from'] = utc_time_interval['from'].astimezone(pytz.utc) utc_time_interval['to'] = utc_time_interval['to'].astimezone(pytz.utc) # time for which is predicted time_for_prediction = time_interval['to'] some_valid_models = self._get_models(db_interface=db_interface) if some_valid_models: # If there is at least one valid model, try to get data from database feature_df, confidence = self._get_data(time_interval=utc_time_interval) else: # If all models are invalid, return error codes. for model_type in self.models.keys(): messages = [MessageCodes.MODEL_NOT_IN_DB, MessageCodes.PREDICTION_UNSUCCESSFUL] logger.error("[%s] Unable to load model - %s - from DB, prediction cannot be executed...", self.section, model_type) msg_tmp = {'predicted_value': -1, 'messages': messages, 'bck': -1, 'time': time_for_prediction, 'delay': -1, 'model_type': model_type, 'confidence': confidence} model_return_vals.append(msg_tmp) return model_return_vals if feature_df is None: # Check if the attempt to retrieve data was successful logger.error("Aggregation of features was unsucessuful !!") messages = [MessageCodes.DATA_AGGREGATION_FAILED, MessageCodes.PREDICTION_UNSUCCESSFUL] tmp = {'predicted_value': -1, 'messages': messages, 'bck': -1, 'time': time_for_prediction, 'delay': -1, 'model_type': -1, 'confidence': confidence} model_return_vals.append(tmp) return model_return_vals # Engineering of features - it is the same for all the models. tmp_messages = [] if feature_df is not None and not feature_df.empty: logger.info('[%s] Features for prediction have been engineered ...', self.section) else: tmp_messages.append(MessageCodes.FEATURE_ENGINEERING_FAILED) logger.warning( '[%s] Features for prediction have not been engineered !', self.section) # Go through all models and to predict. for model_type, model_val in self.models.items(): messages = deepcopy(tmp_messages) # To share the data processing messages among all models # Measure how long does take prediction itself. tick = time.time() try: predicted_tt = model_val['model'].predict(feature_df) except Exception as e: logger.exception(e) predicted_tt = False if model_val['model'] is None: messages.append(MessageCodes.MODEL_NOT_IN_DB) tock = time.time() # Check if prediction was successful if predicted_tt: messages.append(MessageCodes.PREDICTION_SUCCESSFUL) calculated_for_time = time_for_prediction # bck_tt = feature_df[[x for x in feature_df.index if ColumnNames.FEAT_TT_BCK in x][0]] else: messages.append(MessageCodes.PREDICTION_UNSUCCESSFUL) predicted_tt = -1 # bck_tt = -1 confidence = 0.0 calculated_for_time = time_for_prediction logger.debug('[%s] Model - %s - has predicted - %s', self.section, model_type, predicted_tt) msg_tmp = {'predicted_value': predicted_tt, 'messages': messages, 'bck': None, 'time': calculated_for_time, 'delay': tock - tick, 'model_type': model_type, 'confidence': confidence} model_return_vals.append(msg_tmp) return model_return_vals
def _get_data(self, time_interval): """ Method for retrieval of the data from DB and producing features` Dataframes :param dict time_interval: {'from': datetime, 'to': datetime} :return: tuple (pd.Dataframe - features, int - confidence) """ # Try to retrieve data for prediction logger.info('[%s] Retrieving data for prediction ...', self.section) time_interval_list = partition_interval(time_interval=time_interval) DP = DataProcessor(section=self.section) feature_df, confidence = DP.get_features(time_interval_list=time_interval_list) if feature_df is not None and not feature_df.empty: feature_df = feature_df.sort_index().iloc[-1] # For prediction pick just the last record else: feature_df = None return feature_df, confidence def _get_models(self, db_interface): """ Method for retrieving models from DB. :param DBInterface db_interface: instance of DBInterface :return: boolean - True if there is at least one valid model (including the loaded-ones) """ valid_models = 0 for model_type in self.models.keys(): try: up_to_date = self._check_up_to_date_model(db_interface=db_interface, model_type=model_type) if not up_to_date: model_dump = db_interface.load_model(section=self.section, model_type=model_type) logger.info("[%s] Loading new model -- %s...", self.section, model_type) if model_dump is not None and model_dump: self.models[model_type]['model'] = create_model(model_dump) self.models[model_type]['timestamp'] = datetime.now() valid_models += 1 logger.info("[%s] New model loaded -- %s", self.section, model_type) else: self.models[model_type]['model'] = None self.models[model_type]['timestamp'] = None logger.info("[%s] Unable to load model -- %s", self.section, model_type) else: valid_models += 1 logger.info("[%s] Model -- %s -- is up to date", self.section, model_type) except Exception as e: logger.exception(e) return True if valid_models else False def _check_up_to_date_model(self, db_interface, model_type): """ Method for checking is model in memory is up-to-date. :param DBInterface db_interface: instance of DBInterface :param string model_type: name of the model which should be checked. :return: boolean - True if models is in memory and is up-to-date comparing with model stored in DB. """ last_timestamp = db_interface.model_timestamp(section=self.section, model_type=model_type) if (self.models[model_type]['timestamp'] is None) or (last_timestamp is None) or \ (last_timestamp > self.models[model_type]['timestamp']): return False return True