Source code for traveltimes_prediction.data_processing.data_entities.time

from ...support_files.helpers import check_params
from ...support_files import ColumnNames
from .data_entity import DataEntity
from ..feature_engineering import features_to_extract_time
import pandas as pd
import logging


[docs]class TIME(DataEntity): """ Class providing generation of time-specific features. DB retrieval is not provided, because features are generated from sensors` features data. """ def __init__(self, *args, **kwargs): super().__init__(**kwargs) self.mysqlconn_data = None self.logger = logging.getLogger('traveltimes_processing_time') self.time_index = pd.DataFrame(data=kwargs['time_index'].values, columns=[ColumnNames.CALC_TIME]).apply(pd.to_datetime) \ if 'time_index' in kwargs.keys() else None self.data_type = None self.features_definitions = features_to_extract_time @check_params def get_data(self, section, list_between_times): raise NotImplementedError()
[docs] def process_data(self): """ Method for aggregation. :return: tuple """ self.aggregated_df_list = [{'sensor_name': 'Time', 'df': self.time_index}] return self.aggregated_df_list, None
[docs] def engineer_features(self): """ Method for global engineering of features (sensors` data, time features, etc ) :return: pd.DataFrame - DataFrame of features """ df_features = None try: # Engineering from sensors` data for df_obj in self.aggregated_df_list: # For each slice (group of sensors` names) sensor_name = df_obj['sensor_name'] # Extract features for given slice _df = self._engineer_features(df_obj['df'], features_for_extraction=self.features_definitions) # Using join on index instead of merge to avoid duplicate columns df_features = df_features.join(_df.set_index(ColumnNames.CALC_TIME), how='outer') \ if df_features is not None else _df.set_index(ColumnNames.CALC_TIME) self.logger.debug("Time features engineered ...") except Exception as e: self.logger.exception(e) return df_features
def _retrieve_partial_interval_data(self, section, between_time): raise NotImplementedError() def _aggregate(self, df): raise NotImplementedError() def _reorganize(self, obj): raise NotImplementedError()