Source code for traveltimes_prediction.data_processing.data_entities.time
from ...support_files.helpers import check_params
from ...support_files import ColumnNames
from .data_entity import DataEntity
from ..feature_engineering import features_to_extract_time
import pandas as pd
import logging
[docs]class TIME(DataEntity):
"""
Class providing generation of time-specific features. DB retrieval is not provided, because features are
generated from sensors` features data.
"""
def __init__(self, *args, **kwargs):
super().__init__(**kwargs)
self.mysqlconn_data = None
self.logger = logging.getLogger('traveltimes_processing_time')
self.time_index = pd.DataFrame(data=kwargs['time_index'].values,
columns=[ColumnNames.CALC_TIME]).apply(pd.to_datetime) \
if 'time_index' in kwargs.keys() else None
self.data_type = None
self.features_definitions = features_to_extract_time
@check_params
def get_data(self, section, list_between_times):
raise NotImplementedError()
[docs] def process_data(self):
"""
Method for aggregation.
:return: tuple
"""
self.aggregated_df_list = [{'sensor_name': 'Time', 'df': self.time_index}]
return self.aggregated_df_list, None
[docs] def engineer_features(self):
"""
Method for global engineering of features (sensors` data, time features, etc )
:return: pd.DataFrame - DataFrame of features
"""
df_features = None
try: # Engineering from sensors` data
for df_obj in self.aggregated_df_list: # For each slice (group of sensors` names)
sensor_name = df_obj['sensor_name']
# Extract features for given slice
_df = self._engineer_features(df_obj['df'], features_for_extraction=self.features_definitions)
# Using join on index instead of merge to avoid duplicate columns
df_features = df_features.join(_df.set_index(ColumnNames.CALC_TIME),
how='outer') \
if df_features is not None else _df.set_index(ColumnNames.CALC_TIME)
self.logger.debug("Time features engineered ...")
except Exception as e:
self.logger.exception(e)
return df_features
def _retrieve_partial_interval_data(self, section, between_time):
raise NotImplementedError()
def _aggregate(self, df):
raise NotImplementedError()
def _reorganize(self, obj):
raise NotImplementedError()