in jobs/ltv_daily.py [0:0]
def train_metric(d, metric, plot=True, penalty=0):
frequency = metric + "_frequency"
recency = metric + "_recency"
T = metric + "_T"
train = d
train = train[(train[frequency] > 0) & (train[recency] >= 0)]
train[frequency] = train[frequency] - 1
bgf = BetaGeoFitter(penalizer_coef=penalty)
bgf.fit(train[frequency], train[recency], train[T])
n = bgf.data.shape[0]
simulated_data = bgf.generate_new_data(size=n)
model_counts = pd.DataFrame(
bgf.data["frequency"].value_counts().sort_index().iloc[:28]
)
simulated_counts = pd.DataFrame(
simulated_data["frequency"].value_counts().sort_index().iloc[:28]
)
combined_counts = model_counts.merge(
simulated_counts, how="outer", left_index=True, right_index=True
).fillna(0)
combined_counts.columns = ["Actual", "Model"]
if plot:
combined_counts.plot.bar()
display()
return combined_counts, bgf