def eval()

in train.py [0:0]


def eval(setup: TrainingSetup, n_samples: int = -1):
    cfg = setup.cfg
    agent = setup.agent
    rq = setup.rq
    envs = setup.eval_envs

    envs.seed(list(range(envs.num_envs)))  # Deterministic evals
    obs = envs.reset()
    reward = th.zeros(envs.num_envs)
    rewards: List[th.Tensor] = []
    dones: List[th.Tensor] = [th.tensor([False] * envs.num_envs)]
    rq_in: List[List[Dict[str, Any]]] = [[] for _ in range(envs.num_envs)]
    n_imgs = 0
    collect_img = cfg.eval.video is not None
    collect_all = collect_img and cfg.eval.video.record_all
    annotate = collect_img and (
        cfg.eval.video.annotations or (cfg.eval.video.annotations is None)
    )
    vwidth = int(cfg.eval.video.size[0]) if collect_img else 0
    vheight = int(cfg.eval.video.size[1]) if collect_img else 0
    metrics = set(cfg.eval.metrics.keys())
    metrics_v: Dict[str, Any] = defaultdict(
        lambda: [[] for _ in range(envs.num_envs)]
    )
    extra = None
    entropy_ds = []
    while True:
        if collect_img:
            extra_right: List[List[str]] = [[] for _ in range(envs.num_envs)]
            if extra is not None and isinstance(extra, dict) and 'viz' in extra:
                for i in range(envs.num_envs):
                    for k in extra['viz']:
                        if isinstance(extra[k][i], str):
                            extra_right[i].append(f'{k} {extra[k][i]}')
                        elif isinstance(extra[k][i], np.ndarray):
                            v = np.array2string(
                                extra[k][i], separator=',', precision=2
                            )
                            extra_right[i].append(f'{k} {v}')
                        else:
                            v = np.array2string(
                                extra[k][i].cpu().numpy(),
                                separator=',',
                                precision=2,
                            )
                            extra_right[i].append(f'{k} {v}')
            if collect_all:
                for i, img in enumerate(
                    envs.render_all(
                        mode='rgb_array', width=vwidth, height=vheight
                    )
                ):
                    if dones[-1][i].item():
                        continue
                    rq_in[i].append(
                        {
                            'img': img,
                            's_left': [
                                f'Eval',
                                f'Samples {n_samples}',
                            ],
                            's_right': [
                                f'Trial {i+1}',
                                f'Frame {len(rewards)}',
                                f'Reward {reward[i].item():+.02f}',
                            ]
                            + extra_right[i],
                        }
                    )
            else:
                if not dones[-1][0].item():
                    rq_in[0].append(
                        {
                            'img': envs.render_single(
                                mode='rgb_array', width=vwidth, height=vheight
                            ),
                            's_left': [
                                f'Eval',
                                f'Samples {n_samples}',
                            ],
                            's_right': [
                                f'Frame {n_imgs}',
                                f'Reward {reward[0].item():+.02f}',
                            ]
                            + extra_right[0],
                        }
                    )
                    n_imgs += 1
                    if n_imgs > cfg.eval.video.length:
                        collect_img = False

        action, extra = agent.action(envs, obs)
        next_obs, reward, done, info = envs.step(action)
        if 'entropy_d' in envs.ctx:
            entropy_ds.append(envs.ctx['entropy_d'])

        for k in metrics:
            for i in range(len(info)):
                if dones[-1][i].item():
                    continue
                if k in info[i]:
                    metrics_v[k][i].append(info[i][k])
        rewards.append(reward.view(-1).to('cpu', copy=True))
        dones.append(done.view(-1).cpu() | dones[-1])
        if dones[-1].all():
            break
        obs = envs.reset_if_done()

    reward = th.stack(rewards, dim=1)
    not_done = th.logical_not(th.stack(dones, dim=1))
    r_undiscounted = (reward * not_done[:, :-1]).sum(dim=1)
    r_discounted = reward.clone()
    discounted_bwd_cumsum_(r_discounted, cfg.agent.gamma, mask=not_done[:, 1:])[
        :, 0
    ]
    ep_len = not_done.to(th.float32).sum(dim=1)

    metrics_v['episode_length'] = ep_len
    metrics_v['return_disc'] = r_discounted
    metrics_v['return_undisc'] = r_undiscounted
    default_agg = ['mean', 'min', 'max', 'std']
    for k, v in metrics_v.items():
        agg = cfg.eval.metrics[k]
        if isinstance(agg, str):
            if ':' in agg:
                epagg, tagg = agg.split(':')
                if epagg == 'final':
                    v = [ev[-1] for ev in v]
                elif epagg == 'max':
                    v = [max(ev) for ev in v]
                elif epagg == 'min':
                    v = [min(ev) for ev in v]
                elif epagg == 'sum':
                    v = [sum(ev) for ev in v]
                agg = tagg
            elif not isinstance(v, th.Tensor):
                v = itertools.chain(v)
            if agg == 'default':
                agg = default_agg
            else:
                agg = [agg]
        if isinstance(v, th.Tensor):
            agent.tbw_add_scalars(f'Eval/{k}', v, agg, n_samples)
        else:
            agent.tbw_add_scalars(
                f'Eval/{k}', th.tensor(v).float(), agg, n_samples
            )
    log.info(
        f'eval done, avg len {ep_len.mean().item():.01f}, avg return {r_discounted.mean().item():+.03f}, undisc avg {r_undiscounted.mean():+.03f} min {r_undiscounted.min():+0.3f} max {r_undiscounted.max():+0.3f}'
    )

    if len(entropy_ds) > 0:
        ent_d = (
            th.stack(entropy_ds)
            .T.to(not_done.device)
            .masked_select(not_done[:, :-1])
        )
        agent.tbw_add_scalar('Eval/EntropyDMean', ent_d.mean(), n_samples)
        agent.tbw.add_histogram('Eval/EntropyD', ent_d, n_samples, bins=20)

    if sum([len(q) for q in rq_in]) > 0:
        # Display cumulative reward in video
        c_rew = reward * not_done[:, :-1]
        for i in range(c_rew.shape[1] - 1):
            c_rew[:, i + 1] += c_rew[:, i]
        n_imgs = 0
        for i, ep in enumerate(rq_in):
            for j, input in enumerate(ep):
                if n_imgs <= cfg.eval.video.length:
                    input['s_right'].append(f'Acc. Reward {c_rew[i][j]:+.02f}')
                    if annotate:
                        rq.push(**input)
                    else:
                        rq.push(img=input['img'])
                    n_imgs += 1
        rq.plot()