def generate_episode()

in data/envs/babyai/create_babyai_dataset.py [0:0]


def generate_episode(env):
    episode = {"text_observations": [], "discrete_observations": [], "discrete_actions": [], "rewards": []}
    observation, policy = reset_env_and_policy(env)
    t = 0
    while True:
        episode["text_observations"].append(observation["mission"])
        flattened_symbolic_obs = observation["image"].flatten()
        concatenated_discrete_obs = np.append(observation["direction"], flattened_symbolic_obs)
        episode["discrete_observations"].append(concatenated_discrete_obs)
        action = call_with_timeout(policy.replan, timeout_duration=0.02)
        observation, reward, terminated, truncated, _ = env.step(action)
        episode["discrete_actions"].append(int(action))
        episode["rewards"].append(reward)

        if terminated or truncated:
            break

        t += 1
        if t > 1000:
            raise Exception("Episode too long")
    return episode