in textworld/envs/wrappers/tw_inform7.py [0:0]
def _gather_infos(self):
self.state["_game_progression"] = self._game_progression
self.state["_facts"] = list(self._game_progression.state.facts)
self.state["won"] = '*** The End ***' in self.state["feedback"]
self.state["lost"] = '*** You lost! ***' in self.state["feedback"]
self.state["_winning_policy"] = self._current_winning_policy
if self.infos.policy_commands:
self.state["policy_commands"] = []
if self._current_winning_policy is not None:
self.state["policy_commands"] = self._inform7.gen_commands_from_actions(self._current_winning_policy)
if self.infos.intermediate_reward:
self.state["intermediate_reward"] = 0
if self.state["won"]:
# The last action led to winning the game.
self.state["intermediate_reward"] = 1
elif self.state["lost"]:
# The last action led to losing the game.
self.state["intermediate_reward"] = -1
elif self._previous_winning_policy is None:
self.state["intermediate_reward"] = 0
else:
diff = len(self._previous_winning_policy) - len(self._current_winning_policy)
self.state["intermediate_reward"] = int(diff > 0) - int(diff < 0) # Sign function.
if self.infos.facts:
self.state["facts"] = list(map(self._inform7.get_human_readable_fact, self.state["_facts"]))
self.state["_last_action"] = self._last_action
if self.infos.last_action and self._last_action is not None:
self.state["last_action"] = self._inform7.get_human_readable_action(self._last_action)
self.state["_valid_actions"] = self._game_progression.valid_actions
if self.infos.admissible_commands:
all_valid_commands = self._inform7.gen_commands_from_actions(self._game_progression.valid_actions)
# To guarantee the order from one execution to another, we sort the commands.
# Remove any potential duplicate commands (they would lead to the same result anyway).
self.state["admissible_commands"] = sorted(set(all_valid_commands))
if self.infos.moves:
self.state["moves"] = self._moves