in classy_vision/hooks/tensorboard_plot_hook.py [0:0]
def on_phase_end(self, task) -> None:
"""Add the losses and learning rates to tensorboard."""
if self.learning_rates is None:
logging.warning("learning_rates is not initialized")
return
phase_type = task.phase_type
cum_sample_fetch_times = self._get_cum_sample_fetch_times(phase_type)
batches = len(task.losses)
if batches == 0 or not is_primary():
return
phase_type_idx = task.train_phase_idx if task.train else task.eval_phase_idx
logging.info(f"Plotting to Tensorboard for {phase_type} phase {phase_type_idx}")
for i in range(0, len(self.wall_times), self.log_period):
global_step = (
i + self.state.step_count[phase_type] - len(self.wall_times) + 1
)
if cum_sample_fetch_times:
self.tb_writer.add_scalar(
f"Speed/{phase_type}/cumulative_sample_fetch_time",
cum_sample_fetch_times[i],
global_step=global_step,
walltime=self.wall_times[i],
)
if task.train:
self.tb_writer.add_scalar(
"Learning Rate/train",
self.learning_rates[i],
global_step=global_step,
walltime=self.wall_times[i],
)
if task.train:
for name, parameter in task.base_model.named_parameters():
self.tb_writer.add_histogram(
f"Parameters/{name}", parameter, global_step=phase_type_idx
)
if torch.cuda.is_available():
self.tb_writer.add_scalar(
f"Memory/{phase_type}/peak_allocated",
torch.cuda.max_memory_allocated(),
global_step=phase_type_idx,
)
loss_avg = sum(task.losses) / batches
loss_key = f"Losses/{phase_type}"
self.tb_writer.add_scalar(loss_key, loss_avg, global_step=phase_type_idx)
# plot meters which return a dict
for meter in task.meters:
if not isinstance(meter.value, dict):
log.warn(f"Skipping meter {meter.name} with value: {meter.value}")
continue
for name, value in meter.value.items():
if isinstance(value, float):
meter_key = f"Meters/{phase_type}/{meter.name}/{name}"
self.tb_writer.add_scalar(
meter_key, value, global_step=phase_type_idx
)
else:
log.warn(
f"Skipping meter name {meter.name}/{name} with value: {value}"
)
continue
if hasattr(task, "perf_log"):
for perf in task.perf_log:
phase_idx = perf["phase_idx"]
tag = perf["tag"]
for metric_name, metric_value in perf.items():
if metric_name in ["phase_idx", "tag"]:
continue
self.tb_writer.add_scalar(
f"Speed/{tag}/{metric_name}",
metric_value,
global_step=phase_idx,
)
# flush so that the plots aren't lost if training crashes soon after
self.tb_writer.flush()
logging.info("Done plotting to Tensorboard")