in src/rime/dataset/base.py [0:0]
def get_stats(self):
if "TEST_START_TIME" in self.user_in_test and "_hist_ts" in self.user_in_test:
avg_hist_span = self.user_in_test[ # test users with finite history
(self.user_in_test["TEST_START_TIME"] < np.inf) &
(self.user_in_test["_hist_ts"].apply(len) > 0)
].apply(
lambda x: x["TEST_START_TIME"] - x["_hist_ts"][0], axis=1
).mean()
else:
avg_hist_span = float("nan")
return {
'user_df': {
'# test users': len(self.user_in_test),
'# train users': len(self.training_data.user_df),
'avg hist len': self.user_in_test['_hist_len'].mean(),
'avg hist span': avg_hist_span,
'horizon': self.horizon,
'avg target items': self.target_csr.sum(axis=1).mean(),
},
'item_df': {
'# test items': len(self.item_in_test),
'# train items': len(self.training_data.item_df),
'avg hist len': self.item_in_test['_hist_len'].mean(),
'avg target users': self.target_csr.sum(axis=0).mean(),
},
'event_df': {
'# train events': len(self.training_data.event_df),
'# test events': self.target_csr.sum(),
'default_user_rec_top_c': self.default_user_rec_top_c,
'default_item_rec_top_k': self.default_item_rec_top_k,
"user_ppl_baseline": self.user_ppl_baseline,
"item_ppl_baseline": self.item_ppl_baseline,
},
}