in marketing-analytics/predicting/future-customer-value-segments/common/__init__.py [0:0]
def calculate_model_fit_validation(_, options, dates, calcbs, repeat_tx,
num_customers, num_txns):
"""Checks whether the input transactions data is good enough for the
pipeline to continue.
Args:
_: Ignoring the first argument since it's only a value to trigger
the call of this function (since it's running inside a FlatMap
operator)
options: Pipeline options.
dates: Dictionary containing important border dates to use in the
calculation.
calcbs: customer-by-sufficient-statistic (CBS) matrix in calibration
period.
repeat_tx: Sorted cumulative sum of transactions for each time unit.
num_customers: Dictionary containing statistics regarding the number
of customers.
num_txns: Dictionary containing statistics regarding the number
of transactions.
Returns:
Predictions per customer (as lists).
The result is wrapped in another list since this function is called
inside a FlatMap operator.
"""
cbs = pd.DataFrame(
calcbs,
columns=[
'customer_id', 'number_of_transactions', 'average_order_value',
'frequency', 'recency', 'total_time_observed'
])
txs = pd.DataFrame(
repeat_tx,
columns=[
'time_unit_number', 'repeat_transactions',
'repeat_transactions_cumulative'
])
model_time_divisor = TimeGranularityParams(
options[_OPTION_MODEL_TIME_GRANULARITY]).get_days()
model_params = frequency_model_validation(
model_type=options[_OPTION_FREQUENCY_MODEL_TYPE],
cbs=cbs,
cal_start_date=dates[_OPTION_CALIBRATION_START_DATE],
cal_end_date=dates[_OPTION_CALIBRATION_END_DATE],
time_divisor=model_time_divisor,
time_label=options[_OPTION_MODEL_TIME_GRANULARITY],
hold_end_date=dates[_OPTION_HOLDOUT_END_DATE],
repeat_tx=txs,
output_folder=options[_OPTION_OUTPUT_FOLDER],
num_customers_cohort=num_customers['num_customers_cohort'],
perc_customers_cohort=num_customers['perc_customers_cohort'],
num_txns_val=num_txns['num_txns_val'],
perc_txns_val=num_txns['perc_txns_val'],
penalizer_coef=options[_OPTION_PENALIZER_COEF],
)
# Validate the gamma-gamma (spend) model
gamma_gamma_validation(cbs, options[_OPTION_PENALIZER_COEF])
validation_params = {
'calibration_start_date': date_to_str(dates[_OPTION_CALIBRATION_START_DATE]),
'calibration_end_date': date_to_str(dates[_OPTION_CALIBRATION_END_DATE]),
'cohort_start_date': date_to_str(dates[_OPTION_COHORT_START_DATE]),
'cohort_end_date': date_to_str(dates[_OPTION_COHORT_END_DATE]),
'holdout_end_date': date_to_str(dates[_OPTION_HOLDOUT_END_DATE]),
'model_time_granularity': options[_OPTION_MODEL_TIME_GRANULARITY].capitalize(),
'model': model_params,
}
# Let's check to see if the transaction frequency error is within
# the allowed threshold. If so, continue the calculation. If not,
# fail with an error and stop all calculations.
error = None
if (
options[_OPTION_FREQUENCY_MODEL_TYPE] != _MODEL_TYPE_BGBB and
float(model_params['validation_mape']) > float(
options[_OPTION_TRANSACTION_FREQUENCY_THRESHOLD])):
model_params['invalid_mape'] = True
error = (
f"Validation Mean Absolute Percent Error (MAPE) [{model_params['validation_mape']}%]"
" exceeded the allowable threshold of "
f"{options[_OPTION_TRANSACTION_FREQUENCY_THRESHOLD]}"
)
return [(validation_params, error)]