in vissl/hooks/log_hooks.py [0:0]
def on_update(self, task: "tasks.ClassyTask") -> None:
"""
Executed after after parameter update. If the current phase is training,
and it's a logging iteration, we compute and log several helpul training
stats to keep track of ongoing training.
For monitoring the batch size (average training iteration time), we allow
monitoring the stats (optionally) for every N iterations to get better
idea about the batch time and training eta.
Set the btime_freq input using cfg.HOOKS.PERF_STATS.PERF_STAT_FREQUENCY=N
ensuring that cfg.HOOKS.PERF_STATS.MONITOR_PERF_STATS = True.
"""
phase_type = "train" if task.train else "test"
if is_primary() and phase_type == "train":
train_phase_idx = task.train_phase_idx
log_freq = task.config["LOG_FREQUENCY"]
iteration = task.iteration
if torch.cuda.is_available():
peak_mem_used = int(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0)
else:
peak_mem_used = -1
if (
(iteration == 1)
or (iteration % log_freq == 0)
or (iteration <= 100 and iteration % 5 == 0)
):
loss_val = round(task.last_batch.loss.data.cpu().item(), 5)
if len(task.batch_time) > 0:
batch_times = task.batch_time
else:
batch_times = [0]
avg_time = sum(batch_times) / len(batch_times)
eta_secs = avg_time * (task.max_iteration - iteration)
eta_string = str(datetime.timedelta(seconds=int(eta_secs)))
if isinstance(task.optimizer.options_view.lr, (set, list)):
lr_val = list(task.optimizer.options_view.lr)
else:
lr_val = round(task.optimizer.options_view.lr, 5)
batch_time = int(1000.0 * avg_time)
rank = get_rank()
log_data = {
"Rank": rank,
"ep": train_phase_idx,
"iter": iteration,
"lr": lr_val,
"loss": loss_val,
"btime(ms)": batch_time,
"eta": eta_string,
"peak_mem(M)": peak_mem_used,
}
if iteration == 1:
# Set max iterations. Currently used in benchmark_suite_scheduler.py
log_data["max_iterations"] = task.max_iteration
if self.btime_freq and len(batch_times) >= self.btime_freq:
rolling_avg_time = (
sum(batch_times[-self.btime_freq :]) / self.btime_freq
)
rolling_eta_secs = int(
rolling_avg_time * (task.max_iteration - iteration)
)
rolling_eta_str = str(
datetime.timedelta(seconds=int(rolling_eta_secs))
)
rolling_btime = int(1000.0 * rolling_avg_time)
log_data[f"btime({self.btime_freq}iters)(ms)"] = rolling_btime
log_data["rolling_eta"] = rolling_eta_str
# to maintain the backwards compatibility with the log.txt
# logs, we convert the json to the previous format.
# the stdout.json can be used to use the json format of logs.
stdout_data = ""
for key, value in log_data.items():
stdout_data = (
f"{stdout_data}[{key}: {value}] "
if key == "ep"
else f"{stdout_data}{key}: {value}; "
)
logging.info(stdout_data.strip())
self.json_stdout_logger.write(json.dumps(log_data) + "\n")