in src/rime/models/zero_shot/bayes_lm.py [0:0]
def transform(self, D):
""" generate score matrix by evaluating top or random items in test """
explode_titles, splits, weights = explode_user_titles(
D.user_in_test['_hist_items'], self.item_df[self.text_column_name], self.gamma)
sorted_items = self.item_df[self.item_df.index.isin(D.item_in_test.index)] \
.sort_values('log_p_y', ascending=False, kind='mergesort')
p_y = torch.as_tensor(sorted_items['log_p_y'].values).softmax(0).numpy()
num_candidates = int(min(self.max_num_candidates, len(sorted_items)))
with _to_cuda(self.model) as model:
scores = []
for x in tqdm(explode_titles):
if self.candidate_selection_method == 'greedy':
ind = np.arange(num_candidates)
else:
ind = np.random.choice(len(sorted_items), num_candidates, False, p_y)
candidate_titles = sorted_items[self.text_column_name].values[ind]
log_p_y_ind = sorted_items['log_p_y'].values[ind]
log_p_x_given_y_ind = np.hstack([
self._compute_log_p_x_given_y(Y, x, model.device) for Y in
np.split(candidate_titles, range(0, num_candidates, self.batch_size)[1:])
])
log_p_y_given_x_ind = log_p_x_given_y_ind / self.temperature + log_p_y_ind
log_p_y_given_x = matrix_reindex(
log_p_y_given_x_ind, sorted_items.index[ind],
D.item_in_test.index, axis=0, fill_value=-np.inf)
p_y_given_x = torch.as_tensor(log_p_y_given_x).softmax(0).numpy()
scores.append(p_y_given_x)
user_item_scores = np.vstack([w @ x for w, x in zip(
np.split(weights, splits), np.split(np.vstack(scores), splits)
)])
return user_item_scores # dense matrix with shape = user_in_test x item_in_test