in marketing-analytics/predicting/future-customer-value-segments/common/__init__.py [0:0]
def calculate_prediction(_, options, fullcbs, num_customers, num_txns):
"""Calculates predictions by customer.
Args:
_: Ignoring the first argument since it's only a value to trigger
the call of this function (since it's running inside a FlatMap
operator)
options: Pipeline options.
fullcbs: Full customer-by-sufficient-statistic (CBS) records.
num_customers: Dictionary containing statistics regarding the number
of customers.
num_txns: Dictionary containing statistics regarding the number
of transactions.
Returns:
Predictions per customer (as lists) and prediction parameters Dict.
The result is wrapped in another list since this function is called
inside a FlatMap operator.
"""
model_time_granularity_single = TimeGranularityParams(
options[_OPTION_MODEL_TIME_GRANULARITY]).get_time_unit()
prediction_period = options[_OPTION_PREDICTION_PERIOD]
prediction_params = {
'prediction_period': prediction_period,
'prediction_period_unit': model_time_granularity_single,
'model_time_granularity': options[_OPTION_MODEL_TIME_GRANULARITY]
.capitalize(),
'customers_modeled': num_customers['num_customers_total'],
'transactions_observed': num_txns['num_txns_total']
}
columns = [
'customer_id', 'number_of_transactions', 'historical_aov', 'frequency',
'recency', 'total_time_observed'
]
# Read in full CBS matrix
data = pd.DataFrame(fullcbs, columns=columns)
# Fit the model
frequency_model_type = options[_OPTION_FREQUENCY_MODEL_TYPE]
if frequency_model_type == _MODEL_TYPE_BGNBD:
frequency_model = fit_bgnbd_model(data, options[_OPTION_PENALIZER_COEF])
bgnbd_params = extract_bgnbd_params(frequency_model)
prediction_params['frequency_model'] = 'BG/NBD'
prediction_params['bgnbd_model_params'] = bgnbd_params
prediction_params['bgbb_model_params'] = None
prediction_params['paretonbd_model_params'] = None
elif frequency_model_type == _MODEL_TYPE_MBGNBD:
frequency_model = fit_mbgnbd_model(data, options[_OPTION_PENALIZER_COEF])
mbgnbd_params = extract_bgnbd_params(frequency_model)
prediction_params['frequency_model'] = 'MBG/NBD'
prediction_params['bgnbd_model_params'] = mbgnbd_params
prediction_params['bgbb_model_params'] = None
prediction_params['paretonbd_model_params'] = None
elif frequency_model_type == _MODEL_TYPE_BGBB:
frequency_model = fit_bgbb_model(data, options[_OPTION_PENALIZER_COEF])
bgbb_params = extract_bgbb_params(frequency_model)
prediction_params['frequency_model'] = 'BG/BB'
prediction_params['bgnbd_model_params'] = None
prediction_params['bgbb_model_params'] = bgbb_params
prediction_params['paretonbd_model_params'] = None
elif frequency_model_type == _MODEL_TYPE_PNBD:
frequency_model = fit_pnbd_model(data, options[_OPTION_PENALIZER_COEF])
pnbd_params = extract_pnbd_params(frequency_model)
prediction_params['frequency_model'] = 'Pareto/NBD'
prediction_params['bgnbd_model_params'] = None
prediction_params['bgbb_model_params'] = None
prediction_params['paretonbd_model_params'] = pnbd_params
else:
raise ValueError('Model type %s is not valid' % frequency_model_type)
# Predict probability alive for customers
if frequency_model_type == _MODEL_TYPE_BGBB:
data['p_alive'] = frequency_model.conditional_probability_alive(
prediction_period, data['frequency'], data['recency'],
data['total_time_observed'])
else:
data['p_alive'] = frequency_model.conditional_probability_alive(
data['frequency'], data['recency'], data['total_time_observed'])
# Predict future purchases (X weeks/days/months)
if frequency_model_type == _MODEL_TYPE_PNBD:
data['predicted_purchases'] = pnbd_conditional_expected_transactions(
frequency_model, data['p_alive'], prediction_period,
data['frequency'], data['total_time_observed'])
else:
data['predicted_purchases'] = \
frequency_model.conditional_expected_number_of_purchases_up_to_time(
prediction_period, data['frequency'],
data['recency'], data['total_time_observed'])
# GammaGamma (Spend)
gamma_gamma_model = fit_gamma_gamma_model_prediction(
data, options[_OPTION_PENALIZER_COEF])
gamma_gamma_params = extract_gamma_gamma_params(gamma_gamma_model)
prediction_params['gamma_gamma_params'] = gamma_gamma_params
# Calculate FutureAOV by customer
data['future_aov'] = gamma_gamma_model.conditional_expected_average_profit(
data['number_of_transactions'], data['historical_aov'])
# Compute CLV (ExpectedValue)
data['expected_value'] = data['predicted_purchases'] * data['future_aov']
# Modify Recency to be human-interpretable
data['recency'] = data['total_time_observed'] - data['recency']
# Final output
columns = [
'customer_id', 'p_alive', 'predicted_purchases', 'future_aov',
'historical_aov', 'expected_value', 'frequency', 'recency',
'total_time_observed'
]
final_no_segments = data[columns]
return [[final_no_segments.values, prediction_params]]