def calculate_prediction()

in marketing-analytics/predicting/future-customer-value-segments/common/__init__.py [0:0]


def calculate_prediction(_, options, fullcbs, num_customers, num_txns):
    """Calculates predictions by customer.

    Args:
        _: Ignoring the first argument since it's only a value to trigger
            the call of this function (since it's running inside a FlatMap
            operator)
        options: Pipeline options.
        fullcbs: Full customer-by-sufficient-statistic (CBS) records.
        num_customers: Dictionary containing statistics regarding the number
            of customers.
        num_txns: Dictionary containing statistics regarding the number
            of transactions.

    Returns:
        Predictions per customer (as lists) and prediction parameters Dict.
        The result is wrapped in another list since this function is called
        inside a FlatMap operator.
    """
    model_time_granularity_single = TimeGranularityParams(
        options[_OPTION_MODEL_TIME_GRANULARITY]).get_time_unit()
    prediction_period = options[_OPTION_PREDICTION_PERIOD]

    prediction_params = {
        'prediction_period': prediction_period,
        'prediction_period_unit': model_time_granularity_single,
        'model_time_granularity': options[_OPTION_MODEL_TIME_GRANULARITY]
            .capitalize(),
        'customers_modeled': num_customers['num_customers_total'],
        'transactions_observed': num_txns['num_txns_total']
    }

    columns = [
        'customer_id', 'number_of_transactions', 'historical_aov', 'frequency',
        'recency', 'total_time_observed'
    ]

    # Read in full CBS matrix
    data = pd.DataFrame(fullcbs, columns=columns)

    # Fit the model
    frequency_model_type = options[_OPTION_FREQUENCY_MODEL_TYPE]
    if frequency_model_type == _MODEL_TYPE_BGNBD:
        frequency_model = fit_bgnbd_model(data, options[_OPTION_PENALIZER_COEF])
        bgnbd_params = extract_bgnbd_params(frequency_model)

        prediction_params['frequency_model'] = 'BG/NBD'
        prediction_params['bgnbd_model_params'] = bgnbd_params
        prediction_params['bgbb_model_params'] = None
        prediction_params['paretonbd_model_params'] = None

    elif frequency_model_type == _MODEL_TYPE_MBGNBD:
        frequency_model = fit_mbgnbd_model(data, options[_OPTION_PENALIZER_COEF])
        mbgnbd_params = extract_bgnbd_params(frequency_model)

        prediction_params['frequency_model'] = 'MBG/NBD'
        prediction_params['bgnbd_model_params'] = mbgnbd_params
        prediction_params['bgbb_model_params'] = None
        prediction_params['paretonbd_model_params'] = None

    elif frequency_model_type == _MODEL_TYPE_BGBB:
        frequency_model = fit_bgbb_model(data, options[_OPTION_PENALIZER_COEF])
        bgbb_params = extract_bgbb_params(frequency_model)

        prediction_params['frequency_model'] = 'BG/BB'
        prediction_params['bgnbd_model_params'] = None
        prediction_params['bgbb_model_params'] = bgbb_params
        prediction_params['paretonbd_model_params'] = None

    elif frequency_model_type == _MODEL_TYPE_PNBD:
        frequency_model = fit_pnbd_model(data, options[_OPTION_PENALIZER_COEF])
        pnbd_params = extract_pnbd_params(frequency_model)

        prediction_params['frequency_model'] = 'Pareto/NBD'
        prediction_params['bgnbd_model_params'] = None
        prediction_params['bgbb_model_params'] = None
        prediction_params['paretonbd_model_params'] = pnbd_params

    else:
        raise ValueError('Model type %s is not valid' % frequency_model_type)

    # Predict probability alive for customers
    if frequency_model_type == _MODEL_TYPE_BGBB:
        data['p_alive'] = frequency_model.conditional_probability_alive(
            prediction_period, data['frequency'], data['recency'],
            data['total_time_observed'])
    else:
        data['p_alive'] = frequency_model.conditional_probability_alive(
            data['frequency'], data['recency'], data['total_time_observed'])

    # Predict future purchases (X weeks/days/months)
    if frequency_model_type == _MODEL_TYPE_PNBD:
        data['predicted_purchases'] = pnbd_conditional_expected_transactions(
            frequency_model, data['p_alive'], prediction_period,
            data['frequency'], data['total_time_observed'])
    else:
        data['predicted_purchases'] = \
            frequency_model.conditional_expected_number_of_purchases_up_to_time(
                prediction_period, data['frequency'],
                data['recency'], data['total_time_observed'])

    # GammaGamma (Spend)
    gamma_gamma_model = fit_gamma_gamma_model_prediction(
        data, options[_OPTION_PENALIZER_COEF])
    gamma_gamma_params = extract_gamma_gamma_params(gamma_gamma_model)
    prediction_params['gamma_gamma_params'] = gamma_gamma_params

    # Calculate FutureAOV by customer
    data['future_aov'] = gamma_gamma_model.conditional_expected_average_profit(
        data['number_of_transactions'], data['historical_aov'])

    # Compute CLV (ExpectedValue)
    data['expected_value'] = data['predicted_purchases'] * data['future_aov']

    # Modify Recency to be human-interpretable
    data['recency'] = data['total_time_observed'] - data['recency']

    # Final output
    columns = [
        'customer_id', 'p_alive', 'predicted_purchases', 'future_aov',
        'historical_aov', 'expected_value', 'frequency', 'recency',
        'total_time_observed'
    ]
    final_no_segments = data[columns]

    return [[final_no_segments.values, prediction_params]]