in data/envs/babyai/create_babyai_dataset.py [0:0]
def generate_episode(env):
episode = {"text_observations": [], "discrete_observations": [], "discrete_actions": [], "rewards": []}
observation, policy = reset_env_and_policy(env)
t = 0
while True:
episode["text_observations"].append(observation["mission"])
flattened_symbolic_obs = observation["image"].flatten()
concatenated_discrete_obs = np.append(observation["direction"], flattened_symbolic_obs)
episode["discrete_observations"].append(concatenated_discrete_obs)
action = call_with_timeout(policy.replan, timeout_duration=0.02)
observation, reward, terminated, truncated, _ = env.step(action)
episode["discrete_actions"].append(int(action))
episode["rewards"].append(reward)
if terminated or truncated:
break
t += 1
if t > 1000:
raise Exception("Episode too long")
return episode