in agents/offline_agents.py [0:0]
def real_eval(cls, cache, model, actions, task_ids, tier,
max_attempts_per_task, eval_batch_size, finetune_iterations,
refine_iterations, refine_loss, refine_lr, checkpoint_dir):
# TODO: move to a flag.
finetune_lr = 1e-4
model = model.to(nets.DEVICE)
simulator = phyre.initialize_simulator(task_ids, tier)
observations = simulator.initial_scenes
assert tuple(task_ids) == simulator.task_ids
logging.info('Ranking %d actions and simulating top %d', len(actions),
max_attempts_per_task)
if refine_iterations > 0:
logging.info(
'Will do refining for %d iterations with lr=%e and loss=%s',
refine_iterations, refine_lr, refine_loss)
evaluator = DetailedEvaluator(task_ids)
start = -1
model = torch.nn.DataParallel(model)
psimulator = ParallelPhyreSimulator(task_ids,
tier,
num_workers=10,
max_len=1,
max_batch_size=1024,
requires_imgs=True,
requires_featurized=True)
latest_ckpt = get_latest_eval_checkpoint(checkpoint_dir)
if latest_ckpt is not None:
with open(latest_ckpt, "rb") as file_io:
evaluator, start = pickle.load(file_io)
logging.info("Resuming evaluation from " + latest_ckpt)
checkpoint_every = 20
for task_index in tqdm.trange(len(task_ids)):
if task_index <= start:
continue
task_id = simulator.task_ids[task_index]
if refine_iterations > 0:
refined_actions = neural_agent_contrastive.refine_actions(
model, actions, observations[task_index], refine_lr,
refine_iterations, eval_batch_size, refine_loss)
else:
refined_actions = actions
scores = neural_agent_contrastive.eval_actions(model, refined_actions,
eval_batch_size, task_index,
psimulator)
# Order of descendig scores.
action_order = np.argsort(-scores)
if not refine_iterations > 0:
statuses = cache.load_simulation_states(task_id)
finetune_data = []
for action_id in action_order:
if evaluator.get_attempts_for_task(
task_index) >= max_attempts_per_task:
break
action = refined_actions[action_id]
if refine_iterations > 0:
status = simulator.simulate_action(task_index,
action,
need_images=False).status
else:
status = phyre.SimulationStatus(statuses[action_id])
finetune_data.append((task_index, status, action))
evaluator.maybe_log_attempt(task_index, status, action)
if evaluator.get_attempts_for_task(task_index) == 0:
logging.warning('Made 0 attempts for task %s', task_id)
if finetune_iterations > 0:
neural_agent_contrastive.finetune(model, finetune_data, simulator,
finetune_lr, finetune_iterations)
if task_index % checkpoint_every == 0:
file_name = os.path.join(checkpoint_dir, "eval.%04d" % task_index)
with open(file_name, "wb") as file_io:
pickle.dump((evaluator, task_index), file_io)
logging.info("Checkpointed evaluation to " + file_name)
return evaluator