in train.py [0:0]
def eval(setup: TrainingSetup, n_samples: int = -1):
cfg = setup.cfg
agent = setup.agent
rq = setup.rq
envs = setup.eval_envs
envs.seed(list(range(envs.num_envs))) # Deterministic evals
obs = envs.reset()
reward = th.zeros(envs.num_envs)
rewards: List[th.Tensor] = []
dones: List[th.Tensor] = [th.tensor([False] * envs.num_envs)]
rq_in: List[List[Dict[str, Any]]] = [[] for _ in range(envs.num_envs)]
n_imgs = 0
collect_img = cfg.eval.video is not None
collect_all = collect_img and cfg.eval.video.record_all
annotate = collect_img and (
cfg.eval.video.annotations or (cfg.eval.video.annotations is None)
)
vwidth = int(cfg.eval.video.size[0]) if collect_img else 0
vheight = int(cfg.eval.video.size[1]) if collect_img else 0
metrics = set(cfg.eval.metrics.keys())
metrics_v: Dict[str, Any] = defaultdict(
lambda: [[] for _ in range(envs.num_envs)]
)
extra = None
entropy_ds = []
while True:
if collect_img:
extra_right: List[List[str]] = [[] for _ in range(envs.num_envs)]
if extra is not None and isinstance(extra, dict) and 'viz' in extra:
for i in range(envs.num_envs):
for k in extra['viz']:
if isinstance(extra[k][i], str):
extra_right[i].append(f'{k} {extra[k][i]}')
elif isinstance(extra[k][i], np.ndarray):
v = np.array2string(
extra[k][i], separator=',', precision=2
)
extra_right[i].append(f'{k} {v}')
else:
v = np.array2string(
extra[k][i].cpu().numpy(),
separator=',',
precision=2,
)
extra_right[i].append(f'{k} {v}')
if collect_all:
for i, img in enumerate(
envs.render_all(
mode='rgb_array', width=vwidth, height=vheight
)
):
if dones[-1][i].item():
continue
rq_in[i].append(
{
'img': img,
's_left': [
f'Eval',
f'Samples {n_samples}',
],
's_right': [
f'Trial {i+1}',
f'Frame {len(rewards)}',
f'Reward {reward[i].item():+.02f}',
]
+ extra_right[i],
}
)
else:
if not dones[-1][0].item():
rq_in[0].append(
{
'img': envs.render_single(
mode='rgb_array', width=vwidth, height=vheight
),
's_left': [
f'Eval',
f'Samples {n_samples}',
],
's_right': [
f'Frame {n_imgs}',
f'Reward {reward[0].item():+.02f}',
]
+ extra_right[0],
}
)
n_imgs += 1
if n_imgs > cfg.eval.video.length:
collect_img = False
action, extra = agent.action(envs, obs)
next_obs, reward, done, info = envs.step(action)
if 'entropy_d' in envs.ctx:
entropy_ds.append(envs.ctx['entropy_d'])
for k in metrics:
for i in range(len(info)):
if dones[-1][i].item():
continue
if k in info[i]:
metrics_v[k][i].append(info[i][k])
rewards.append(reward.view(-1).to('cpu', copy=True))
dones.append(done.view(-1).cpu() | dones[-1])
if dones[-1].all():
break
obs = envs.reset_if_done()
reward = th.stack(rewards, dim=1)
not_done = th.logical_not(th.stack(dones, dim=1))
r_undiscounted = (reward * not_done[:, :-1]).sum(dim=1)
r_discounted = reward.clone()
discounted_bwd_cumsum_(r_discounted, cfg.agent.gamma, mask=not_done[:, 1:])[
:, 0
]
ep_len = not_done.to(th.float32).sum(dim=1)
metrics_v['episode_length'] = ep_len
metrics_v['return_disc'] = r_discounted
metrics_v['return_undisc'] = r_undiscounted
default_agg = ['mean', 'min', 'max', 'std']
for k, v in metrics_v.items():
agg = cfg.eval.metrics[k]
if isinstance(agg, str):
if ':' in agg:
epagg, tagg = agg.split(':')
if epagg == 'final':
v = [ev[-1] for ev in v]
elif epagg == 'max':
v = [max(ev) for ev in v]
elif epagg == 'min':
v = [min(ev) for ev in v]
elif epagg == 'sum':
v = [sum(ev) for ev in v]
agg = tagg
elif not isinstance(v, th.Tensor):
v = itertools.chain(v)
if agg == 'default':
agg = default_agg
else:
agg = [agg]
if isinstance(v, th.Tensor):
agent.tbw_add_scalars(f'Eval/{k}', v, agg, n_samples)
else:
agent.tbw_add_scalars(
f'Eval/{k}', th.tensor(v).float(), agg, n_samples
)
log.info(
f'eval done, avg len {ep_len.mean().item():.01f}, avg return {r_discounted.mean().item():+.03f}, undisc avg {r_undiscounted.mean():+.03f} min {r_undiscounted.min():+0.3f} max {r_undiscounted.max():+0.3f}'
)
if len(entropy_ds) > 0:
ent_d = (
th.stack(entropy_ds)
.T.to(not_done.device)
.masked_select(not_done[:, :-1])
)
agent.tbw_add_scalar('Eval/EntropyDMean', ent_d.mean(), n_samples)
agent.tbw.add_histogram('Eval/EntropyD', ent_d, n_samples, bins=20)
if sum([len(q) for q in rq_in]) > 0:
# Display cumulative reward in video
c_rew = reward * not_done[:, :-1]
for i in range(c_rew.shape[1] - 1):
c_rew[:, i + 1] += c_rew[:, i]
n_imgs = 0
for i, ep in enumerate(rq_in):
for j, input in enumerate(ep):
if n_imgs <= cfg.eval.video.length:
input['s_right'].append(f'Acc. Reward {c_rew[i][j]:+.02f}')
if annotate:
rq.push(**input)
else:
rq.push(img=input['img'])
n_imgs += 1
rq.plot()