Source code for traveltimes_prediction.support_files.helpers

import re
from bisect import bisect_left
from datetime import datetime, timedelta
import zlib, base64

import numpy as np
import pandas as pd
from copy import deepcopy

from .exceptions import InvalidArgumentException, UnexpectedInputFormatException


# @profile
[docs]def index(a, x): """ Binary search, lookup of the leftmost value exactly equal to x :param np.array a: :param number x: :return: number """ 'Locate the leftmost value exactly equal to x' i = bisect_left(a, x) if i != len(a) and a[i] == x: return i raise ValueError
[docs]def dataframe_append(df_base, df_new): """ Function for appending of the dataframes. :param pd.DataFrame df_base: :param pd.DataFrame df_new: :return: pd.DataFrame """ if df_base is None: df_base = df_new elif isinstance(df_base, pd.DataFrame): df_base = df_base.append(df_new) return df_base
[docs]def array_append(arr_base, arr_new, stack='v'): """ Function for appending of the numpy array. :param np.array arr_base: :param np.array arr_new: :param char stack: The stacking dimension - horizontal ('h') or vertical ('v') :return: np.array """ if arr_base is None: arr_base = arr_new if isinstance(arr_base, list): arr_base = np.array(arr_base) elif isinstance(arr_base, np.ndarray): if isinstance(arr_new, list): arr_new = np.array(arr_new) if stack == 'v': arr_base = np.vstack((arr_base, arr_new)) elif stack == 'h': if len(arr_base.shape) == 1: arr_base = arr_base.reshape(-1, 1) if len(arr_new.shape) == 1: arr_new = arr_new.reshape(-1, 1) arr_base = np.hstack((arr_base, arr_new)) else: raise InvalidArgumentException return arr_base
[docs]def merge_inner_lists(list_of_lists): """ Function for merging of the inner lists creating one list of all elements. :param list list_of_lists: list of lists or tuples :return: list """ if not isinstance(list_of_lists, list) and not isinstance(list_of_lists, tuple): return None merged_list = [] for l in list_of_lists: if isinstance(l, list) or isinstance(l, tuple): merged_list.extend(l) else: merged_list.append(l) return merged_list
[docs]def check_params(func): """ Decorator for checking the method`s/function`s input parameters - if they are not empty or None. :param func: :return: """ def func_wrapper(*args, **kwargs): for arg in list(kwargs.values()) + list(args): if arg is None or \ ((isinstance(arg, list) or isinstance(arg, dict)) and not arg) or \ (isinstance(arg, pd.DataFrame) and arg.empty): return False return func(*args, **kwargs) return func_wrapper
[docs]def convert_params(func): """ Decorator for conversion of the method`s/function`s input parameters - pd.DataFrame -> np.ndarray :param func: :return: """ def func_wrapper(*args, **kwargs): _args = [] _kwargs = dict() for arg in list(args): if isinstance(arg, pd.DataFrame) or isinstance(arg, pd.Series): _args.append(arg.values) else: _args.append(arg) for kwarg_name, kwarg in kwargs.items(): if isinstance(kwarg, pd.DataFrame) or isinstance(kwarg, pd.Series): _kwargs[kwarg_name] = kwarg.values else: _kwargs[kwarg_name] = kwarg return func(*_args, **_kwargs) return func_wrapper
[docs]def chunkify(l, n): """ Method for cutting huge list into more smaller lists. :param list l: :param int n: Number of lists to be created. :return: """ return [l[i::n] for i in range(n)]
[docs]def merge_to_nearest(df1, df2): """ Function for merging of the pd.DataFrames according to their Datetime indices. :param pd.DataFrame df1: :param pd.DataFrame df2: :return: pd.DataFrame """ idx = np.searchsorted(df1.index.values, df2.index.values) - 1 mask = idx >= 0 g = df1.iloc[idx][mask] i_seen = set() i_mask = [] for i in list(g.index): if i in i_seen: i_mask.append(False) continue i_mask.append(True) i_seen.add(i) i_mask = np.array(i_mask) g = g.loc[i_mask] gg = df2.loc[mask].loc[i_mask] c1 = deepcopy(list(df1.columns)) c2 = deepcopy(list(df2.columns)) arr1 = g.values arr2 = gg.values arr_stack = np.hstack((arr1, arr2)) df = pd.DataFrame(arr_stack, columns=c1+c2, index=g.index) return df
[docs]def subtract_time_intervals(new, saved): """ Function for the subtracting of the intervals, A-B. :param dict new: A, format {'from': datetime, 'to': datetime} :param dict saved: B, format {'from': datetime, 'to': datetime} :return: dict {'from': datetime, 'to': datetime} """ if new is None and saved is None: raise ValueError('Both of time intervals cannot be None !!') if saved is None or new['from'] > saved['to']: return new if new['from'] < saved['to']: return {'from': saved['to'], 'to': new['to']} if new == saved: return dict()
[docs]def partition_interval(time_interval, delta_hours=12): """ Function for partitioning the time_interval to list of dicts - shorter intervals :param dict time_interval: dict of datetimes -> {'from': datetime, 'to': datetime} :param int delta_hours: :return: list of dicts - [{'from': datetime, 'to': datetime}, ...] """ if time_interval['from'] > time_interval['to']: raise UnexpectedInputFormatException('From is later than To !!') list_between_times = [] from_dt = time_interval['from'] while from_dt + timedelta(hours=delta_hours) < time_interval['to']: list_between_times.append({'from': from_dt, 'to': from_dt + timedelta(hours=delta_hours)}) from_dt = from_dt + timedelta(hours=delta_hours) if from_dt != time_interval['to']: list_between_times.append({'from': from_dt, 'to': time_interval['to']}) return list_between_times
[docs]def impute(array, columns, invalid_val): """ Function for imputation of the numpy array - replacing invalid values. :param np.array array: :param list columns: Indices of columns. :param number invalid_val: Identifier of the invalid number. :return: tuple (imputed array, confidence - ratio of count of imputed elements to the size of the original array) """ if len(array.shape) < 2: array = array.reshape(-1, 1) _arr = array[:, columns].astype(np.float) xes = np.array(list(range(array.shape[0]))) bad_val = np.array(list(map(lambda x: True if invalid_val in x else False, _arr))) if np.any(bad_val) and not np.all(bad_val): for idx, c in enumerate(_arr.T): interpolated = np.interp(xes[bad_val], xes[~bad_val], c[~bad_val]) _arr[bad_val, idx] = interpolated elif np.all(bad_val): return None, 1.0 return _arr, 1 - sum(bad_val) / _arr.shape[0]
[docs]def compress(s): """ Function for compression of string. :param string s: :return: Encoded string -- binary. """ return base64.b64encode(zlib.compress(s.encode())).decode()
[docs]def decompress(c): """ Function for decompression of binary coded string. :param string c: :return: Decoded binary string -- ascii. """ return zlib.decompress(base64.b64decode(c.encode())).decode()