in reagent/evaluation/evaluation_data_page.py [0:0]
def validate(self):
assert len(self.logged_propensities.shape) == 2
assert len(self.logged_rewards.shape) == 2
assert len(self.logged_values.shape) == 2
assert len(self.model_propensities.shape) == 2
assert len(self.model_rewards.shape) == 2
assert len(self.model_values.shape) == 2
assert self.logged_propensities.shape[1] == 1
assert self.logged_rewards.shape[1] == 1
assert self.logged_values.shape[1] == 1
num_actions = self.model_propensities.shape[1]
assert self.model_rewards.shape[1] == num_actions
assert self.model_values.shape[1] == num_actions
assert self.action_mask.shape == self.model_propensities.shape
if self.logged_metrics is not None:
assert len(self.logged_metrics.shape) == 2
assert len(self.logged_metrics_values.shape) == 2
assert len(self.model_metrics.shape) == 2
assert len(self.model_metrics_values.shape) == 2
num_metrics = self.logged_metrics.shape[1]
assert self.logged_metrics_values.shape[1] == num_metrics, (
"Invalid shape: "
+ str(self.logged_metrics_values.shape)
+ " != "
+ str(num_metrics)
)
assert self.model_metrics.shape[1] == num_metrics * num_actions, (
"Invalid shape: "
+ str(self.model_metrics.shape)
+ " != "
+ str(num_metrics * num_actions)
)
assert self.model_metrics_values.shape[1] == num_metrics * num_actions
minibatch_size = self.logged_propensities.shape[0]
logger.info("EvaluationDataPage data size: {}".format(minibatch_size))
assert minibatch_size == self.logged_rewards.shape[0]
assert minibatch_size == self.logged_values.shape[0]
assert minibatch_size == self.model_propensities.shape[0]
assert minibatch_size == self.model_rewards.shape[0]
assert minibatch_size == self.model_values.shape[0]
if self.logged_metrics is not None:
assert minibatch_size == self.logged_metrics.shape[0]
assert minibatch_size == self.logged_metrics_values.shape[0]
assert minibatch_size == self.model_metrics.shape[0]
assert minibatch_size == self.model_metrics_values.shape[0]
logger.info("Average logged reward = %s", self.logged_rewards.mean())
logger.info(
"Average model propensity for action 0 = %s",
self.model_propensities[:, 0].mean(),
)
logger.info(
"Average model propensity for action 1 = %s",
self.model_propensities[:, 1].mean(),
)
logger.info(
"Average logged propensity = %s",
self.logged_propensities.mean(),
)
flatten_mdp_id = self.mdp_id.reshape(-1)
unique_mdp_ids = set(flatten_mdp_id.tolist())
prev_mdp_id, prev_seq_num = None, None
mdp_count = 0
for mdp_id, seq_num in zip(flatten_mdp_id, self.sequence_number):
if prev_mdp_id is None or mdp_id != prev_mdp_id:
mdp_count += 1
prev_mdp_id = mdp_id
else:
assert seq_num > prev_seq_num, (
f"For mdp_id {mdp_id}, got {seq_num} <= {prev_seq_num}."
f"Sequence number must be in increasing order.\n"
f"Zip(mdp_id, seq_num): "
f"{list(zip(flatten_mdp_id, self.sequence_number))}"
)
prev_seq_num = seq_num
assert len(unique_mdp_ids) == mdp_count, "MDPs are broken up. {} vs {}".format(
len(unique_mdp_ids), mdp_count
)