in reagent/gym/envs/pomdp/pocman.py [0:0]
def step(self, action):
assert self.action_space.contains(action)
assert self.done is False
self.step_cnt += 1
reward = -1
next_pos = self.next_pos(self.internal_state.agent_pos, action)
self.internal_state.agent_pos = next_pos
if self.internal_state.power_duration > 0:
self.internal_state.power_duration -= 1
agent_in_power = self.internal_state.power_duration > 0
hit_ghost = set()
for g, ghost in enumerate(self.internal_state.ghosts):
# check if the ghost hits the agent before and after it moves
if ghost.pos == self.internal_state.agent_pos:
hit_ghost.add(g)
else:
ghost.move(self.internal_state.agent_pos, agent_in_power)
if ghost.pos == self.internal_state.agent_pos:
hit_ghost.add(g)
hit_ghost = list(hit_ghost)
for g in hit_ghost:
if self.internal_state.power_duration > 0:
reward += 25
self.internal_state.ghosts[g].reset()
else:
reward += -100
self.done = True
break
if self.step_cnt > self.board["_max_step"]:
self.done = True
if self._agent_at_food():
reward += 10
self.maze[
self.internal_state.agent_pos.x, self.internal_state.agent_pos.y
] = Element.CLEAR_WALK_WAY
if self._food_left() == 0:
self.done = True
if self._agent_at_power():
self.internal_state.power_duration = self.board["_power_duration"]
self.maze[
self.internal_state.agent_pos.x, self.internal_state.agent_pos.y
] = Element.CLEAR_WALK_WAY
reward += 10
ob = self._make_ob()
return ob, reward, self.done, {"state": self.internal_state}