in train/learner.py [0:0]
def log_target(steps, current_time, metrics_values):
nonlocal last_step
nonlocal last_time
sps = (steps - last_step) / (current_time - last_time)
if tb_writer is not None:
for metric_name, metric_val in metrics_values.items():
tb_writer.add_scalar(f"learner/{metric_name}", metric_val, steps)
tb_writer.add_scalar("learner/sps", sps, steps)
episode_returns = []
for _ in range(log_queue.qsize()):
ids, infos = log_queue.get()
for (
actor_id,
episode_step,
episode_return,
cwnd_mean,
delay_mean,
throughput_mean,
train_job_id,
) in zip(
ids.tolist(),
infos["episode_step"].tolist(),
infos["episode_return"].tolist(),
infos["cwnd_mean"].tolist(),
infos["delay_mean"].tolist(),
infos["throughput_mean"].tolist(),
infos["train_job_id"].tolist(),
):
episode_returns.append(episode_return)
# These quantities must be averaged over all steps, not summed.
cwnd_mean /= episode_step
delay_mean /= episode_step
throughput_mean /= episode_step
# At this point, `train_job_id` is the index of the job in the
# list of training jobs for this experiment. But for logging
# purpose, we want to store the job ID as it would appear in
# the `train_job_ids` option (i.e., its index in the list of
# all jobs from `experiments.yml`). We do the conversion here.
job_id = train_jobs[train_job_id]["job_id"]
if tb_writer is not None:
tb_writer.add_scalar("actor/episode_steps", episode_step, steps)
tb_writer.add_scalar("actor/episode_return", episode_return, steps)
tb_writer.add_scalar("actor/cwnd_mean", cwnd_mean, steps)
tb_writer.add_scalar("actor/delay_mean", delay_mean, steps)
tb_writer.add_scalar(
"actor/throughput_mean", throughput_mean, steps
)
log(
flags=flags,
step=steps,
episode_step=episode_step,
episode_return=episode_return,
actor_id=actor_id,
job_id=job_id,
sps=sps,
loss=metrics_values["loss/total"],
cwnd_mean=cwnd_mean,
delay_mean=delay_mean,
throughput_mean=throughput_mean,
timestep=time.time(),
)
# Log every 100 steps (roughly -- depends on `steps_per_epoch`).
if steps // 100 > last_step // 100:
if episode_returns:
logging.info(
"Step %i @ %.1f SPS. Mean episode return: %f. "
"Episodes finished: %i. Loss: %f.",
steps,
sps,
sum(episode_returns) / len(episode_returns),
len(episode_returns),
metrics_values["loss/total"],
)
else:
logging.info(
"Step %i @ %.1f SPS. Loss: %f.",
steps,
sps,
metrics_values["loss/total"],
)
last_step = steps
last_time = current_time