in kats/detectors/bocpd.py [0:0]
def pred_prob(self, t: int, x: float) -> np.ndarray:
"""Predictive probability of a new data point
Args:
t: time
x: the new data point
Returns:
pred_arr: Array with log predictive probabilities for each starting point.
"""
# TODO: use better priors
def log_post_pred(y: float, t: int, rl: int) -> float:
N = self._x.shape[0]
x_arr = self._x[N - rl - 1 : N, :]
y_arr = self._y[N - rl - 1 : N].reshape(-1, 1)
xtx = np.matmul(x_arr.transpose(), x_arr) # computes X^T X
xty = np.squeeze(np.matmul(x_arr.transpose(), y_arr)) # computes X^T Y
yty = np.matmul(y_arr.transpose(), y_arr) # computes Y^T Y
# Bayesian learning update
lambda_n = xtx + self.lambda_prior
mu_n = np.matmul(
np.linalg.inv(lambda_n),
np.squeeze(np.matmul(self.lambda_prior, self.mu_prior) + xty),
)
a_n = self.a_0 + t / 2
mu_prior = self.mu_prior
assert mu_prior is not None
mu_prec_prior = np.matmul(
np.matmul(mu_prior.transpose(), self.lambda_prior), self.mu_prior
)
mu_prec_n = np.matmul(np.matmul(mu_n.transpose(), lambda_n), mu_n)
b_n = self.b_0 + 1 / 2 * (yty + mu_prec_prior - mu_prec_n)
if a_n < 0 or b_n < 0:
logging.info(
f"""
Got nonpositive parameters for Inv-Gamma: {a_n}, {b_n}.
Likely, integer overflow -- maybe scale down the data?
"""
)
# cannot allow this to fail arbitrarily, so falling back to prior
if a_n < 0:
a_n = self.a_0
if b_n < 0:
b_n = self.b_0
# Compute likelihood of new point x under new Bayesian parameters
x_new = np.array([1.0, t]).reshape(2, -1)
(
indiv_likelihoods,
prediction,
var_pred,
) = _BayesianLinReg._sample_likelihood(
mu_n, lambda_n, a_n, b_n, x_new, y, self.num_likelihood_samples
)
likelihoods = np.sum(indiv_likelihoods)
likelihoods = max(likelihoods, self.min_sum_samples)
avg_likelihood = likelihoods / self.num_likelihood_samples
mean_prediction = np.mean(prediction)
std_prediction = np.sqrt(var_pred)
self._mean_arr[t].append(mean_prediction)
self._std_arr[t].append(std_prediction)
return np.log(avg_likelihood)
if t % 50 == 1: # put 1 because then t=1 will show up
logging.info(f"Running Bayesian Linear Regression with t={t}.")
# initialize empty mean and std deviation arrays
self._mean_arr[t] = []
self._std_arr[t] = []
pred_arr = np.array([log_post_pred(y=x, t=t, rl=rl) for rl in range(t)])
return pred_arr