in workshops/pre_POC_workshop/local_util/analysis.py [0:0]
def _get_velocity_per_item(the_df, timestamp_col, target_value_col, item_id_col, location_id_col=None):
"""Calculate velocity as target_demand per time unit per time series
Inputs: pandas dataframe with columns: timestamp, target_value, item_id, location_id (optional)
Outputs: pandas dataframe with extra "velocity" column
"""
df = the_df.copy()
df[timestamp_col] = pd.to_datetime(df[timestamp_col], format='%Y-%m-%d %H:%M:%S')
# append 2 extra columns per time seres: min_time, max_time
if location_id_col == None:
df = _get_time_min_max(the_df, item_id_col, timestamp_col)
else:
df = _get_time_min_max(the_df, item_id_col, timestamp_col, location_id_col)
# print (df.sample(10))
# calculate time span per time seres
df['time_span'] = df['max_time'] - df['min_time']
df['time_span'] = df['time_span'].apply(lambda x: x.seconds / 3600 + 1) # add 1 to include start datetime and end datetime
# calculate average item demand per time unit
if location_id_col is None:
df = df.groupby([item_id_col], as_index=False).agg({'time_span':'mean', target_value_col:'sum'})
else:
df = df.groupby([item_id_col, location_id_col], as_index=False).agg({'time_span':'mean', target_value_col:'sum'})
df['velocity'] = df[target_value_col] / df['time_span']
return df