in workshops/pre_POC_workshop/local_util/analysis.py [0:0]
def _get_time_min_max(the_df, item_id_col, timestamp_col, location_id_col=None):
"""Calculate min timestamp, max timestamp per item and/or per item-location time series
Inputs: pandas dataframe with columns: timestamp, target_value, item_id, location_id (optional)
Outputs: pandas dataframe with 2 extra columns "min_time" and "max_time"
"""
df = the_df.copy()
if location_id_col is None:
# get max
max_time_df = \
df.groupby([item_id_col], as_index=False).max()[[item_id_col, timestamp_col]]
max_time_df.columns = [item_id_col, 'max_time']
# get min
min_time_df = df.groupby([item_id_col], as_index=False).min()[[item_id_col, timestamp_col]]
min_time_df.columns = [item_id_col, 'min_time']
# merge 2 extra columns per item grouping: max and min
df = df.merge(right=max_time_df, on=item_id_col)
df = df.merge(right=min_time_df, on=item_id_col)
else:
# get max
max_time_df = \
df.groupby([item_id_col, location_id_col], as_index=False).max()[[item_id_col, location_id_col, timestamp_col]]
max_time_df.columns = [item_id_col, location_id_col, 'max_time']
# get min
min_time_df = df.groupby([item_id_col, location_id_col], as_index=False).min()[[item_id_col, location_id_col, timestamp_col]]
min_time_df.columns = [item_id_col, location_id_col, 'min_time']
# merge 2 extra columns per item grouping: max and min
df = df.merge(right=max_time_df, on=[item_id_col, location_id_col])
df = df.merge(right=min_time_df, on=[item_id_col, location_id_col])
return df