Source code for traveltimes_prediction.data_processing.data_entities.bck_tt

from ...support_files.helpers import check_params
from ...configs.db_settings import connection_string_data_det1
from .data_entity import DataEntity
from ..feature_engineering import features_to_extract_bck_tt
import pandas as pd
import logging
import traceback

import pymysql


[docs]class BCK_TT(DataEntity): """ Class providing interface for retrieval of BCK data and generation of features from those. """ def __init__(self, *args, **kwargs): """ Constructor. :param args: :param kwargs: """ super().__init__(**kwargs) self.mysqlconn_data = pymysql.connect(**connection_string_data_det1) # Connection to data source - bck, sensors self.mysql_datetime_format = '%Y-%m-%d %H:%M:%S' # Datetime format that MySQL understands self.logger = logging.getLogger('traveltimes_processing_bck_tt') self.data_type = None self.features_definitions = features_to_extract_bck_tt @check_params def get_data(self, section, list_between_times): """ Function for sequential execution of sql queries for longer timespans. :param section: string - which section`s data should be loaded :param list_between_times: list of dicts of traveltimes - [{'from':..., 'to':...}, {...}, ...] :return pd.DataFrame with features. """ df = None # Iterating through the entire list of time-spans (tuples) self.logger.debug("Downloading of chunks-days from DB started ...") i = 1 for between_time in list_between_times: try: _df = self._retrieve_partial_interval_data(section=section, between_time=between_time) if df is None and _df is not None and not _df.empty: df = _df elif (df is not None) and (_df is not None) and (not _df.empty): df = df.append(_df, ignore_index=True) except: traceback.print_exc() self.logger.error("Error while processing data for time span: {} !!".format(between_time)) self.logger.debug("Processed {:<5} of {:>5} ... ".format(i, len(list_between_times))) i += 1 self.logger.debug("Finished...") self.retrieved_data = df return df
[docs] def process_data(self): """ Method for aggregation of features. :return: tuple """ # TODO some imputing ? + modification of the query self.aggregated_df_list = [{'sensor_name': 'BCK_TT', 'df': self.retrieved_data}] return self.aggregated_df_list, None # TODO maybe make use of 'penalty' field from tt3 ?
def _retrieve_partial_interval_data(self, section, between_time): """ Function for retrieving the bck traveltime data for given session and time-span from DB. :param section: string - which section`s data should be loaded :param between_time: dict - {'from': datetime, 'to': datetime} :return: pandas.DataFrame - bck data """ backwards_prediction_query = """ SELECT calculation_time, tt_calculated FROM tt3.tt3_output_traveltimes where output_section=%s and (calculation_time between %s and %s) and confidence > 50; """ df_backwards_prediction = None try: # The 'section' (e.g. 'KOCE-LNCE') is not the valid identifier used for bck sections, the valid identifier needs to be looked up. bck_section_name = self._get_backwards_section_name(connection=self.mysqlconn_data, section=section) if bck_section_name is not None: df_backwards_prediction = pd.read_sql(sql=backwards_prediction_query, params=(bck_section_name, between_time['from'].strftime(self.mysql_datetime_format), between_time['to'].strftime(self.mysql_datetime_format)), con=self.mysqlconn_data) except: traceback.print_exc() return df_backwards_prediction def _get_backwards_section_name(self, section, connection): """ Private method for looking up the valid identifier of the section used in bck data. :param section: string - section identifier, e.g. 'KOCE-LNCE' :param connection: object - database connection, instance of opened PyMySQL :return: the identifier if exists. """ cursor = connection.cursor() cursor.execute(""" SELECT output_section FROM tt3.tt3_output_traveltimes WHERE output_section LIKE %s LIMIT 1 """, ('%' + section[:9] + '%',)) backwards_section = cursor.fetchone() cursor.close() return backwards_section[0] if backwards_section is not None else None def _aggregate(self, df): pass def _reorganize(self, obj): pass