in python/prophet/diagnostics.py [0:0]
def performance_metrics(df, metrics=None, rolling_window=0.1, monthly=False):
"""Compute performance metrics from cross-validation results.
Computes a suite of performance metrics on the output of cross-validation.
By default the following metrics are included:
'mse': mean squared error
'rmse': root mean squared error
'mae': mean absolute error
'mape': mean absolute percent error
'mdape': median absolute percent error
'smape': symmetric mean absolute percentage error
'coverage': coverage of the upper and lower intervals
A subset of these can be specified by passing a list of names as the
`metrics` argument.
Metrics are calculated over a rolling window of cross validation
predictions, after sorting by horizon. Averaging is first done within each
value of horizon, and then across horizons as needed to reach the window
size. The size of that window (number of simulated forecast points) is
determined by the rolling_window argument, which specifies a proportion of
simulated forecast points to include in each window. rolling_window=0 will
compute it separately for each horizon. The default of rolling_window=0.1
will use 10% of the rows in df in each window. rolling_window=1 will
compute the metric across all simulated forecast points. The results are
set to the right edge of the window.
If rolling_window < 0, then metrics are computed at each datapoint with no
averaging (i.e., 'mse' will actually be squared error with no mean).
The output is a dataframe containing column 'horizon' along with columns
for each of the metrics computed.
Parameters
----------
df: The dataframe returned by cross_validation.
metrics: A list of performance metrics to compute. If not provided, will
use ['mse', 'rmse', 'mae', 'mape', 'mdape', 'smape', 'coverage'].
rolling_window: Proportion of data to use in each rolling window for
computing the metrics. Should be in [0, 1] to average.
monthly: monthly=True will compute horizons as numbers of calendar months
from the cutoff date, starting from 0 for the cutoff month.
Returns
-------
Dataframe with a column for each metric, and column 'horizon'
"""
valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'mdape', 'smape', 'coverage']
if metrics is None:
metrics = valid_metrics
if ('yhat_lower' not in df or 'yhat_upper' not in df) and ('coverage' in metrics):
metrics.remove('coverage')
if len(set(metrics)) != len(metrics):
raise ValueError('Input metrics must be a list of unique values')
if not set(metrics).issubset(set(valid_metrics)):
raise ValueError(
'Valid values for metrics are: {}'.format(valid_metrics)
)
df_m = df.copy()
if monthly:
df_m['horizon'] = df_m['ds'].dt.to_period('M').astype(int) - df_m['cutoff'].dt.to_period('M').astype(int)
else:
df_m['horizon'] = df_m['ds'] - df_m['cutoff']
df_m.sort_values('horizon', inplace=True)
if 'mape' in metrics and df_m['y'].abs().min() < 1e-8:
logger.info('Skipping MAPE because y close to 0')
metrics.remove('mape')
if len(metrics) == 0:
return None
w = int(rolling_window * df_m.shape[0])
if w >= 0:
w = max(w, 1)
w = min(w, df_m.shape[0])
# Compute all metrics
dfs = {}
for metric in metrics:
dfs[metric] = eval(metric)(df_m, w)
res = dfs[metrics[0]]
for i in range(1, len(metrics)):
res_m = dfs[metrics[i]]
assert np.array_equal(res['horizon'].values, res_m['horizon'].values)
res[metrics[i]] = res_m[metrics[i]]
return res