in kats/detectors/bocpd.py [0:0]
def _find_posterior(self, model: Any, changepoint_prior: np.ndarray) -> np.ndarray:
"""
This calculates the posterior distribution over changepoints.
The steps here are the same as the algorithm described in
Adams & McKay, 2007. https://arxiv.org/abs/0710.3742
"""
# P(r_t|x_t)
rt_posterior = np.zeros(self._posterior_shape)
# initialize first step
# P(r_0=1) = 1
rt_posterior[0, 0] = 1.0
model.update_sufficient_stats(x=self.data_values[0, self._ts_slice])
# To avoid growing a large dynamic list, we construct a large
# array and grow the array backwards from the end.
# This is conceptually equivalent to array, which we insert/append
# to the beginning - but avoids reallocating memory.
message = np.zeros(self._message_shape)
m_ptr = -1
# set up arrays for debugging
self.pred_mean_arr = pred_mean_arr = np.zeros(self._posterior_shape)
self.pred_std_arr = pred_std_arr = np.zeros(self._posterior_shape)
self.next_pred_prob = next_pred_prob = np.zeros(self._posterior_shape)
# Calculate the log priors once outside the for-loop.
log_cp_prior = np.log(changepoint_prior)
log_om_cp_prior = np.log(1.0 - changepoint_prior)
self.posterior_predictive = 0.0
log_posterior = 0.0
# from the second step onwards
for i in range(1, self.T):
this_pt = self.data_values[i, self._ts_slice]
# P(x_t | r_t-1, x_t^r)
# this arr has a size of t, each element says what is the predictive prob.
# of a point, it the current streak began at t
# Step 3 of paper
pred_arr = model.pred_prob(t=i, x=this_pt)
# Step 9 posterior predictive
if i > 1:
self.posterior_predictive += logsumexp(pred_arr + log_posterior)
# record the mean/variance/prob for debugging
if self.debug:
pred_mean = model.pred_mean(t=i, x=this_pt)
pred_std = model.pred_std(t=i, x=this_pt)
pred_mean_arr[i, 0:i, self._ts_slice] = pred_mean
pred_std_arr[i, 0:i, self._ts_slice] = pred_std
next_pred_prob[i, 0:i, self._ts_slice] = pred_arr
# calculate prob that this is a changepoint, i.e. r_t = 0
# step 5 of paper
# this is elementwise multiplication of pred and message
log_change_point_prob = np.logaddexp.reduce(
pred_arr
+ message[self.T + m_ptr : self.T, self._ts_slice]
+ log_cp_prior,
axis=0,
)
# step 4
# log_growth_prob = pred_arr + message + np.log(1.0 - changepoint_prior)
message[self.T + m_ptr : self.T, self._ts_slice] = (
pred_arr
+ message[self.T + m_ptr : self.T, self._ts_slice]
+ log_om_cp_prior
)
# P(r_t, x_1:t)
# log_joint_prob = np.append(log_change_point_prob, log_growth_prob)
m_ptr -= 1
message[self.T + m_ptr, self._ts_slice] = log_change_point_prob
# calculate evidence, step 6
# (P(x_1:t))
# log_evidence = logsumexp(log_joint_prob)
#
# We use two facts here to make this more efficient:
#
# (1) log(e^(x_1+c) + ... + e^(x_n+c))
# = log(e^c . (e^(x_1) + ... + e^(x_n)))
# = c + log(e^(x_1) + ... + e^(x_n))
#
# (2) log(e^x_1 + e^x_2 + ... + e^x_n) [Associativity of logsumexp]
# = log(e^x_1 + e^(log(e^x_2 + ... + e^x_n)))
#
# In particular, we rewrite:
#
# (5) logaddexp_vec(pred_arr + message + log_cp_prior)
# (4+6) logaddexp_vec(append(log_change_point_prob, pred_arr + message + log_om_cp_prior))
#
# to
#
# M = logaddexp_vector(pred_arr + message) + log_cp_prior (using (1))
# logaddexp_binary( (using (2))
# log_change_point_prob,
# M - log_cp_prior + log_om_cp_prior (using (1))
# )
#
# In this way, we avoid up to T expensive log and exp calls by avoiding
# the repeated calculation of logaddexp_vector(pred_arr + message)
# while adding in only a single binary (not T length) logsumexp
# call in return and some fast addition and multiplications.
log_evidence = np.logaddexp(
log_change_point_prob,
log_change_point_prob - log_cp_prior + log_om_cp_prior,
)
# step 7
# log_posterior = log_joint_prob - log_evidence
log_posterior = (
message[self.T + m_ptr : self.T, self._ts_slice] - log_evidence
)
rt_posterior[i, 0 : (i + 1), self._ts_slice] = np.exp(log_posterior)
# step 8
model.update_sufficient_stats(x=this_pt)
# pass the joint as a message to next step
# message = log_joint_prob
# Message is now passed implicitly - as we set it directly above.
return rt_posterior