in agents/offline_agents.py [0:0]
def real_eval(cls, cache, model, trainer, actions_per_task, task_ids, tier,
max_attempts_per_task, cfg):
# Parameters
if cfg.eval.batch_size:
eval_batch_size = cfg.eval.batch_size
else:
eval_batch_size = cfg.train.batch_size * cfg.eval.bs_multiplier
# Since scaling the eval batch size by this, should scale down the
# workers for training, since the memory might blow up
cfg.eval.data_loader.num_workers = max(
16,
cfg.train.data_loader.num_workers // cfg.eval.bs_multiplier)
logging.warning('Scaling down eval workers to %d',
cfg.eval.data_loader.num_workers)
assert eval_batch_size % cfg.num_gpus == 0, 'Otherwise will error'
model.cuda()
# Not passing in the drop_objs here, since this simulator is only
# used for evaluation
simulator = phyre.initialize_simulator(task_ids, tier)
assert tuple(task_ids) == simulator.task_ids
# New evaluation code only does 1 prediction no matter length of rollout
evaluator = EvaluatorWrapper(simulator, task_ids, 1,
max_attempts_per_task)
if cfg.eval.store_vis:
# Subselect actions that are diverse (some solve, others don't)
# And keep a small subset of actions, not too many
# eval_batch_size = 4 # What I typically visualize for
# store_vis_nsamples = max(cfg.eval.store_vis_nsamples,
# eval_batch_size)
# Make this consistent, to keep numbers always consistent
store_vis_nsamples = cfg.eval.store_vis_nsamples
actions_override = None
if cfg.eval.store_vis_actions is not None:
actions_override = np.array(
cls.read_actions_override(cfg.eval.store_vis_actions))
eval_batch_size = len(actions_override)
task_indices = []
actions = []
# Running separately to be able to match the set that was used
# in before multi-worker testing
for task_index, task_id in enumerate(
tqdm.tqdm(task_ids, 'gen-ing task IDs for vis')):
if actions_override is not None:
this_actions = actions_override
else:
_, _, this_actions, _, _ = (
neural_agent.create_balanced_eval_set(
cache, [task_id], store_vis_nsamples, cfg.tier))
actions.append(this_actions)
task_indices += [task_index] * len(this_actions)
task_indices = np.array(task_indices)
actions = np.concatenate(actions, axis=0)
else:
task_indices = np.repeat(np.arange(len(task_ids)),
len(actions_per_task))
actions = np.concatenate([actions_per_task] * len(task_ids),
axis=0)
logging.info('Ranking %d actions and simulating top %d',
len(actions) // len(task_ids), max_attempts_per_task)
assert len(task_indices) == len(actions)
if cfg.train.data_loader.fwd_model.use_obj_fwd_model:
obj_fwd_model = obj_fwd_agent.ObjTrainer.gen_model(cfg)
if cfg.train.data_loader.fwd_model.weights is not None:
obj_fwd_model = trainer.load_agent_from_folder(
obj_fwd_model, cfg.train.data_loader.fwd_model.weights)
obj_fwd_model = obj_fwd_model.module.cpu()
else:
obj_fwd_model = None
dataset = PhyreDataset(
tier,
task_ids,
task_indices,
# This info not needed for test case
torch.LongTensor([0] * len(task_indices)),
actions,
cfg.simulator,
mode='test',
balance_classes=False,
hard_negatives=False,
init_clip_ratio_to_sim=cfg.eval.init_clip_ratio_to_sim,
init_frames_to_sim=cfg.eval.init_frames_to_sim,
frames_per_clip=cfg.eval.frames_per_clip,
n_hist_frames=cfg.eval.n_hist_frames,
drop_objs=cfg.eval.drop_objs,
obj_fwd_model=obj_fwd_model,
)
# res_actions may be different from actions since the last batch
# might be smaller than the others, and we might end up dropping it
res_scores, res_actions, res_indices, res_pixel_accs = (
trainer.eval_actions(model, dataset, len(actions), eval_batch_size,
cfg))
for task_index, _ in enumerate(task_ids):
mask = (res_indices == task_index)
# When store_vis, the actions are selected differently, so this
# assertion would not hold
assert (cfg.eval.store_vis
or (np.sum(mask) == (len(actions) // len(task_ids))))
if np.sum(mask) == 0:
logging.warning('Missing task %s from evaluation!',
task_ids[task_index])
continue
# statuses = cache.load_simulation_states(task_id)
evaluator.wrapper_add_scores(task_index, res_scores[:, mask],
res_actions[mask])
# # Order of descending scores.
# action_order = np.argsort(-scores)
cls.print_pixel_accs_summary([res_pixel_accs],
cfg.phyre_movable_channels)
return evaluator