def _get_time_min_max()

in workshops/pre_POC_workshop/local_util/analysis.py [0:0]


def _get_time_min_max(the_df, item_id_col, timestamp_col, location_id_col=None):
    """Calculate min timestamp, max timestamp per item and/or per item-location time series  
       Inputs: pandas dataframe with columns: timestamp, target_value, item_id, location_id (optional)
       Outputs: pandas dataframe with 2 extra columns "min_time" and "max_time"
    """
    df = the_df.copy()
    
    if location_id_col is None:
        # get max
        max_time_df = \
            df.groupby([item_id_col], as_index=False).max()[[item_id_col, timestamp_col]]
        max_time_df.columns = [item_id_col, 'max_time']
        # get min
        min_time_df = df.groupby([item_id_col], as_index=False).min()[[item_id_col, timestamp_col]]
        min_time_df.columns = [item_id_col, 'min_time']
        # merge 2 extra columns per item grouping: max and min
        df = df.merge(right=max_time_df, on=item_id_col)
        df = df.merge(right=min_time_df, on=item_id_col)
        
    else:
        # get max
        max_time_df = \
            df.groupby([item_id_col, location_id_col], as_index=False).max()[[item_id_col, location_id_col, timestamp_col]]
        max_time_df.columns = [item_id_col, location_id_col, 'max_time']
        # get min
        min_time_df = df.groupby([item_id_col, location_id_col], as_index=False).min()[[item_id_col, location_id_col, timestamp_col]]
        min_time_df.columns = [item_id_col, location_id_col, 'min_time']
        # merge 2 extra columns per item grouping: max and min
        df = df.merge(right=max_time_df, on=[item_id_col, location_id_col])
        df = df.merge(right=min_time_df, on=[item_id_col, location_id_col])
        
    return df