import re
from bisect import bisect_left
from datetime import datetime, timedelta
import zlib, base64
import numpy as np
import pandas as pd
from copy import deepcopy
from .exceptions import InvalidArgumentException, UnexpectedInputFormatException
# @profile
[docs]def index(a, x):
"""
Binary search, lookup of the leftmost value exactly equal to x
:param np.array a:
:param number x:
:return: number
"""
'Locate the leftmost value exactly equal to x'
i = bisect_left(a, x)
if i != len(a) and a[i] == x:
return i
raise ValueError
[docs]def dataframe_append(df_base, df_new):
"""
Function for appending of the dataframes.
:param pd.DataFrame df_base:
:param pd.DataFrame df_new:
:return: pd.DataFrame
"""
if df_base is None:
df_base = df_new
elif isinstance(df_base, pd.DataFrame):
df_base = df_base.append(df_new)
return df_base
[docs]def array_append(arr_base, arr_new, stack='v'):
"""
Function for appending of the numpy array.
:param np.array arr_base:
:param np.array arr_new:
:param char stack: The stacking dimension - horizontal ('h') or vertical ('v')
:return: np.array
"""
if arr_base is None:
arr_base = arr_new
if isinstance(arr_base, list):
arr_base = np.array(arr_base)
elif isinstance(arr_base, np.ndarray):
if isinstance(arr_new, list):
arr_new = np.array(arr_new)
if stack == 'v':
arr_base = np.vstack((arr_base, arr_new))
elif stack == 'h':
if len(arr_base.shape) == 1:
arr_base = arr_base.reshape(-1, 1)
if len(arr_new.shape) == 1:
arr_new = arr_new.reshape(-1, 1)
arr_base = np.hstack((arr_base, arr_new))
else:
raise InvalidArgumentException
return arr_base
[docs]def merge_inner_lists(list_of_lists):
"""
Function for merging of the inner lists creating one list of all elements.
:param list list_of_lists: list of lists or tuples
:return: list
"""
if not isinstance(list_of_lists, list) and not isinstance(list_of_lists, tuple):
return None
merged_list = []
for l in list_of_lists:
if isinstance(l, list) or isinstance(l, tuple):
merged_list.extend(l)
else:
merged_list.append(l)
return merged_list
[docs]def check_params(func):
"""
Decorator for checking the method`s/function`s input parameters - if they are not empty or None.
:param func:
:return:
"""
def func_wrapper(*args, **kwargs):
for arg in list(kwargs.values()) + list(args):
if arg is None or \
((isinstance(arg, list) or isinstance(arg, dict)) and not arg) or \
(isinstance(arg, pd.DataFrame) and arg.empty):
return False
return func(*args, **kwargs)
return func_wrapper
[docs]def convert_params(func):
"""
Decorator for conversion of the method`s/function`s input parameters - pd.DataFrame -> np.ndarray
:param func:
:return:
"""
def func_wrapper(*args, **kwargs):
_args = []
_kwargs = dict()
for arg in list(args):
if isinstance(arg, pd.DataFrame) or isinstance(arg, pd.Series):
_args.append(arg.values)
else:
_args.append(arg)
for kwarg_name, kwarg in kwargs.items():
if isinstance(kwarg, pd.DataFrame) or isinstance(kwarg, pd.Series):
_kwargs[kwarg_name] = kwarg.values
else:
_kwargs[kwarg_name] = kwarg
return func(*_args, **_kwargs)
return func_wrapper
[docs]def chunkify(l, n):
"""
Method for cutting huge list into more smaller lists.
:param list l:
:param int n: Number of lists to be created.
:return:
"""
return [l[i::n] for i in range(n)]
[docs]def merge_to_nearest(df1, df2):
"""
Function for merging of the pd.DataFrames according to their Datetime indices.
:param pd.DataFrame df1:
:param pd.DataFrame df2:
:return: pd.DataFrame
"""
idx = np.searchsorted(df1.index.values, df2.index.values) - 1
mask = idx >= 0
g = df1.iloc[idx][mask]
i_seen = set()
i_mask = []
for i in list(g.index):
if i in i_seen:
i_mask.append(False)
continue
i_mask.append(True)
i_seen.add(i)
i_mask = np.array(i_mask)
g = g.loc[i_mask]
gg = df2.loc[mask].loc[i_mask]
c1 = deepcopy(list(df1.columns))
c2 = deepcopy(list(df2.columns))
arr1 = g.values
arr2 = gg.values
arr_stack = np.hstack((arr1, arr2))
df = pd.DataFrame(arr_stack, columns=c1+c2, index=g.index)
return df
[docs]def subtract_time_intervals(new, saved):
"""
Function for the subtracting of the intervals, A-B.
:param dict new: A, format {'from': datetime, 'to': datetime}
:param dict saved: B, format {'from': datetime, 'to': datetime}
:return: dict {'from': datetime, 'to': datetime}
"""
if new is None and saved is None:
raise ValueError('Both of time intervals cannot be None !!')
if saved is None or new['from'] > saved['to']:
return new
if new['from'] < saved['to']:
return {'from': saved['to'], 'to': new['to']}
if new == saved:
return dict()
[docs]def partition_interval(time_interval, delta_hours=12):
"""
Function for partitioning the time_interval to list of dicts - shorter intervals
:param dict time_interval: dict of datetimes -> {'from': datetime, 'to': datetime}
:param int delta_hours:
:return: list of dicts - [{'from': datetime, 'to': datetime}, ...]
"""
if time_interval['from'] > time_interval['to']:
raise UnexpectedInputFormatException('From is later than To !!')
list_between_times = []
from_dt = time_interval['from']
while from_dt + timedelta(hours=delta_hours) < time_interval['to']:
list_between_times.append({'from': from_dt,
'to': from_dt + timedelta(hours=delta_hours)})
from_dt = from_dt + timedelta(hours=delta_hours)
if from_dt != time_interval['to']:
list_between_times.append({'from': from_dt,
'to': time_interval['to']})
return list_between_times
[docs]def impute(array, columns, invalid_val):
"""
Function for imputation of the numpy array - replacing invalid values.
:param np.array array:
:param list columns: Indices of columns.
:param number invalid_val: Identifier of the invalid number.
:return: tuple (imputed array, confidence - ratio of count of imputed elements to the size of the original array)
"""
if len(array.shape) < 2:
array = array.reshape(-1, 1)
_arr = array[:, columns].astype(np.float)
xes = np.array(list(range(array.shape[0])))
bad_val = np.array(list(map(lambda x: True if invalid_val in x else False, _arr)))
if np.any(bad_val) and not np.all(bad_val):
for idx, c in enumerate(_arr.T):
interpolated = np.interp(xes[bad_val], xes[~bad_val], c[~bad_val])
_arr[bad_val, idx] = interpolated
elif np.all(bad_val):
return None, 1.0
return _arr, 1 - sum(bad_val) / _arr.shape[0]
[docs]def compress(s):
"""
Function for compression of string.
:param string s:
:return: Encoded string -- binary.
"""
return base64.b64encode(zlib.compress(s.encode())).decode()
[docs]def decompress(c):
"""
Function for decompression of binary coded string.
:param string c:
:return: Decoded binary string -- ascii.
"""
return zlib.decompress(base64.b64decode(c.encode())).decode()