def real_eval()

in agents/offline_agents.py [0:0]


    def real_eval(cls, cache, model, actions, task_ids, tier,
                  max_attempts_per_task, eval_batch_size, finetune_iterations,
                  refine_iterations, refine_loss, refine_lr, checkpoint_dir):

        # TODO: move to a flag.
        finetune_lr = 1e-4

        model = model.to(nets.DEVICE)

        simulator = phyre.initialize_simulator(task_ids, tier)
        observations = simulator.initial_scenes
        assert tuple(task_ids) == simulator.task_ids

        logging.info('Ranking %d actions and simulating top %d', len(actions),
                     max_attempts_per_task)
        if refine_iterations > 0:
            logging.info(
                'Will do refining for %d iterations with lr=%e and loss=%s',
                refine_iterations, refine_lr, refine_loss)

        evaluator = DetailedEvaluator(task_ids)
        start = -1

        model = torch.nn.DataParallel(model)
        psimulator = ParallelPhyreSimulator(task_ids,
                                            tier,
                                            num_workers=10,
                                            max_len=1,
                                            max_batch_size=1024,
                                            requires_imgs=True,
                                            requires_featurized=True)

        latest_ckpt = get_latest_eval_checkpoint(checkpoint_dir)
        if latest_ckpt is not None:
            with open(latest_ckpt, "rb") as file_io:
                evaluator, start = pickle.load(file_io)
                logging.info("Resuming evaluation from " + latest_ckpt)

        checkpoint_every = 20

        for task_index in tqdm.trange(len(task_ids)):
            if task_index <= start:
                continue
            task_id = simulator.task_ids[task_index]
            if refine_iterations > 0:
                refined_actions = neural_agent_contrastive.refine_actions(
                    model, actions, observations[task_index], refine_lr,
                    refine_iterations, eval_batch_size, refine_loss)
            else:
                refined_actions = actions
            scores = neural_agent_contrastive.eval_actions(model, refined_actions,
                                               eval_batch_size, task_index,
                                               psimulator)
            # Order of descendig scores.
            action_order = np.argsort(-scores)
            if not refine_iterations > 0:
                statuses = cache.load_simulation_states(task_id)

            finetune_data = []
            for action_id in action_order:
                if evaluator.get_attempts_for_task(
                        task_index) >= max_attempts_per_task:
                    break
                action = refined_actions[action_id]
                if refine_iterations > 0:
                    status = simulator.simulate_action(task_index,
                                                       action,
                                                       need_images=False).status
                else:
                    status = phyre.SimulationStatus(statuses[action_id])
                finetune_data.append((task_index, status, action))
                evaluator.maybe_log_attempt(task_index, status, action)
            if evaluator.get_attempts_for_task(task_index) == 0:
                logging.warning('Made 0 attempts for task %s', task_id)
            if finetune_iterations > 0:
                neural_agent_contrastive.finetune(model, finetune_data, simulator,
                                      finetune_lr, finetune_iterations)

            if task_index % checkpoint_every == 0:
                file_name = os.path.join(checkpoint_dir, "eval.%04d" % task_index)
                with open(file_name, "wb") as file_io:
                    pickle.dump((evaluator, task_index), file_io)
                logging.info("Checkpointed evaluation to " + file_name)
        return evaluator