def _get_velocity_per_item()

in workshops/pre_POC_workshop/local_util/analysis.py [0:0]


def _get_velocity_per_item(the_df, timestamp_col, target_value_col, item_id_col, location_id_col=None):
    """Calculate velocity as target_demand per time unit per time series
       Inputs: pandas dataframe with columns: timestamp, target_value, item_id, location_id (optional)
       Outputs: pandas dataframe with extra "velocity" column
    """
    df = the_df.copy()
    df[timestamp_col] = pd.to_datetime(df[timestamp_col], format='%Y-%m-%d %H:%M:%S')
    
    # append 2 extra columns per time seres: min_time, max_time
    if location_id_col == None:
        df = _get_time_min_max(the_df, item_id_col, timestamp_col)
    else:
        df = _get_time_min_max(the_df, item_id_col, timestamp_col, location_id_col)
        
#     print (df.sample(10))
    
    # calculate time span per time seres
    df['time_span'] = df['max_time'] - df['min_time']
    df['time_span'] = df['time_span'].apply(lambda x: x.seconds / 3600 + 1) # add 1 to include start datetime and end datetime
    
    # calculate average item demand per time unit
    if location_id_col is None:
        df = df.groupby([item_id_col], as_index=False).agg({'time_span':'mean', target_value_col:'sum'})
    else:
        df = df.groupby([item_id_col, location_id_col], as_index=False).agg({'time_span':'mean', target_value_col:'sum'})
    df['velocity'] = df[target_value_col] / df['time_span']
    
    return df