Source code for traveltimes_prediction.data_processing.data_entities.bck_tt
from ...support_files.helpers import check_params
from ...configs.db_settings import connection_string_data_det1
from .data_entity import DataEntity
from ..feature_engineering import features_to_extract_bck_tt
import pandas as pd
import logging
import traceback
import pymysql
[docs]class BCK_TT(DataEntity):
"""
Class providing interface for retrieval of BCK data and generation of features from those.
"""
def __init__(self, *args, **kwargs):
"""
Constructor.
:param args:
:param kwargs:
"""
super().__init__(**kwargs)
self.mysqlconn_data = pymysql.connect(**connection_string_data_det1) # Connection to data source - bck, sensors
self.mysql_datetime_format = '%Y-%m-%d %H:%M:%S' # Datetime format that MySQL understands
self.logger = logging.getLogger('traveltimes_processing_bck_tt')
self.data_type = None
self.features_definitions = features_to_extract_bck_tt
@check_params
def get_data(self, section, list_between_times):
"""
Function for sequential execution of sql queries for longer timespans.
:param section: string - which section`s data should be loaded
:param list_between_times: list of dicts of traveltimes - [{'from':..., 'to':...}, {...}, ...]
:return pd.DataFrame with features.
"""
df = None
# Iterating through the entire list of time-spans (tuples)
self.logger.debug("Downloading of chunks-days from DB started ...")
i = 1
for between_time in list_between_times:
try:
_df = self._retrieve_partial_interval_data(section=section, between_time=between_time)
if df is None and _df is not None and not _df.empty:
df = _df
elif (df is not None) and (_df is not None) and (not _df.empty):
df = df.append(_df, ignore_index=True)
except:
traceback.print_exc()
self.logger.error("Error while processing data for time span: {} !!".format(between_time))
self.logger.debug("Processed {:<5} of {:>5} ... ".format(i, len(list_between_times)))
i += 1
self.logger.debug("Finished...")
self.retrieved_data = df
return df
[docs] def process_data(self):
"""
Method for aggregation of features.
:return: tuple
"""
# TODO some imputing ? + modification of the query
self.aggregated_df_list = [{'sensor_name': 'BCK_TT', 'df': self.retrieved_data}]
return self.aggregated_df_list, None # TODO maybe make use of 'penalty' field from tt3 ?
def _retrieve_partial_interval_data(self, section, between_time):
"""
Function for retrieving the bck traveltime data for given session and time-span from DB.
:param section: string - which section`s data should be loaded
:param between_time: dict - {'from': datetime, 'to': datetime}
:return: pandas.DataFrame - bck data
"""
backwards_prediction_query = """
SELECT calculation_time, tt_calculated
FROM tt3.tt3_output_traveltimes
where output_section=%s
and (calculation_time between %s and %s)
and confidence > 50;
"""
df_backwards_prediction = None
try:
# The 'section' (e.g. 'KOCE-LNCE') is not the valid identifier used for bck sections, the valid identifier needs to be looked up.
bck_section_name = self._get_backwards_section_name(connection=self.mysqlconn_data, section=section)
if bck_section_name is not None:
df_backwards_prediction = pd.read_sql(sql=backwards_prediction_query,
params=(bck_section_name,
between_time['from'].strftime(self.mysql_datetime_format),
between_time['to'].strftime(self.mysql_datetime_format)),
con=self.mysqlconn_data)
except:
traceback.print_exc()
return df_backwards_prediction
def _get_backwards_section_name(self, section, connection):
"""
Private method for looking up the valid identifier of the section used in bck data.
:param section: string - section identifier, e.g. 'KOCE-LNCE'
:param connection: object - database connection, instance of opened PyMySQL
:return: the identifier if exists.
"""
cursor = connection.cursor()
cursor.execute("""
SELECT output_section
FROM tt3.tt3_output_traveltimes
WHERE output_section LIKE %s
LIMIT 1
""", ('%' + section[:9] + '%',))
backwards_section = cursor.fetchone()
cursor.close()
return backwards_section[0] if backwards_section is not None else None
def _aggregate(self, df):
pass
def _reorganize(self, obj):
pass