in python/prophet/forecaster.py [0:0]
def fit(self, df, **kwargs):
"""Fit the Prophet model.
This sets self.params to contain the fitted model parameters. It is a
dictionary parameter names as keys and the following items:
k (Mx1 array): M posterior samples of the initial slope.
m (Mx1 array): The initial intercept.
delta (MxN array): The slope change at each of N changepoints.
beta (MxK matrix): Coefficients for K seasonality features.
sigma_obs (Mx1 array): Noise level.
Note that M=1 if MAP estimation.
Parameters
----------
df: pd.DataFrame containing the history. Must have columns ds (date
type) and y, the time series. If self.growth is 'logistic', then
df must also have a column cap that specifies the capacity at
each ds.
kwargs: Additional arguments passed to the optimizing or sampling
functions in Stan.
Returns
-------
The fitted Prophet object.
"""
if self.history is not None:
raise Exception('Prophet object can only be fit once. '
'Instantiate a new object.')
if ('ds' not in df) or ('y' not in df):
raise ValueError(
'Dataframe must have columns "ds" and "y" with the dates and '
'values respectively.'
)
history = df[df['y'].notnull()].copy()
if history.shape[0] < 2:
raise ValueError('Dataframe has less than 2 non-NaN rows.')
self.history_dates = pd.to_datetime(pd.Series(df['ds'].unique(), name='ds')).sort_values()
history = self.setup_dataframe(history, initialize_scales=True)
self.history = history
self.set_auto_seasonalities()
seasonal_features, prior_scales, component_cols, modes = (
self.make_all_seasonality_features(history))
self.train_component_cols = component_cols
self.component_modes = modes
self.fit_kwargs = deepcopy(kwargs)
self.set_changepoints()
trend_indicator = {'linear': 0, 'logistic': 1, 'flat': 2}
dat = {
'T': history.shape[0],
'K': seasonal_features.shape[1],
'S': len(self.changepoints_t),
'y': history['y_scaled'],
't': history['t'],
't_change': self.changepoints_t,
'X': seasonal_features,
'sigmas': prior_scales,
'tau': self.changepoint_prior_scale,
'trend_indicator': trend_indicator[self.growth],
's_a': component_cols['additive_terms'],
's_m': component_cols['multiplicative_terms'],
}
if self.growth == 'linear':
dat['cap'] = np.zeros(self.history.shape[0])
kinit = self.linear_growth_init(history)
elif self.growth == 'flat':
dat['cap'] = np.zeros(self.history.shape[0])
kinit = self.flat_growth_init(history)
else:
dat['cap'] = history['cap_scaled']
kinit = self.logistic_growth_init(history)
stan_init = {
'k': kinit[0],
'm': kinit[1],
'delta': np.zeros(len(self.changepoints_t)),
'beta': np.zeros(seasonal_features.shape[1]),
'sigma_obs': 1,
}
if history['y'].min() == history['y'].max() and \
(self.growth == 'linear' or self.growth == 'flat'):
self.params = stan_init
self.params['sigma_obs'] = 1e-9
for par in self.params:
self.params[par] = np.array([self.params[par]])
elif self.mcmc_samples > 0:
self.params = self.stan_backend.sampling(stan_init, dat, self.mcmc_samples, **kwargs)
else:
self.params = self.stan_backend.fit(stan_init, dat, **kwargs)
self.stan_fit = self.stan_backend.stan_fit
# If no changepoints were requested, replace delta with 0s
if len(self.changepoints) == 0:
# Fold delta into the base rate k
self.params['k'] = (self.params['k']
+ self.params['delta'].reshape(-1))
self.params['delta'] = (np.zeros(self.params['delta'].shape)
.reshape((-1, 1)))
return self