in reagent/reporting/seq2reward_reporter.py [0:0]
def aggregating_observers(self):
return {
name: IntervalAggregatingObserver(self.report_interval, aggregator)
for name, aggregator in itertools.chain(
[
("mse_loss_per_batch", agg.MeanAggregator("mse_loss")),
(
"step_entropy_loss_per_batch",
agg.MeanAggregator("step_entropy_loss"),
),
(
"q_values_per_batch",
agg.FunctionsByActionAggregator(
"q_values", self.action_names, {"mean": torch.mean}
),
),
("eval_mse_loss_per_batch", agg.MeanAggregator("eval_mse_loss")),
(
"eval_step_entropy_loss_per_batch",
agg.MeanAggregator("eval_step_entropy_loss"),
),
(
"eval_q_values_per_batch",
agg.FunctionsByActionAggregator(
"eval_q_values", self.action_names, {"mean": torch.mean}
),
),
(
"eval_action_distribution_per_batch",
agg.FunctionsByActionAggregator(
"eval_action_distribution",
self.action_names,
{"mean": torch.mean},
),
),
],
[
(
f"{key}_tb",
agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
)
for key, log_key in [
("mse_loss", "mse_loss"),
("step_entropy_loss", "step_entropy_loss"),
("eval_mse_loss", "eval_mse_loss"),
("eval_step_entropy_loss", "eval_step_entropy_loss"),
]
],
[
(
f"{key}_tb",
agg.TensorBoardActionHistogramAndMeanAggregator(
key, category, title, self.action_names
),
)
for key, category, title in [
("q_values", "q_values", "training"),
("eval_q_values", "q_values", "eval"),
("eval_action_distribution", "action_distribution", "eval"),
]
],
)
}