in gym/gym/envs/board_game/go.py [0:0]
def _step(self, action):
assert self.state.color == self.player_color
# If already terminal, then don't do anything
if self.done:
return self.state.board.encode(), 0., True, {'state': self.state}
# If resigned, then we're done
if action == _resign_action(self.board_size):
self.done = True
return self.state.board.encode(), -1., True, {'state': self.state}
# Play
prev_state = self.state
try:
self.state = self.state.act(action)
except pachi_py.IllegalMove:
if self.illegal_move_mode == 'raise':
six.reraise(*sys.exc_info())
elif self.illegal_move_mode == 'lose':
# Automatic loss on illegal move
self.done = True
return self.state.board.encode(), -1., True, {'state': self.state}
else:
raise error.Error('Unsupported illegal move action: {}'.format(self.illegal_move_mode))
# Opponent play
if not self.state.board.is_terminal:
self.state, opponent_resigned = self._exec_opponent_play(self.state, prev_state, action)
# After opponent play, we should be back to the original color
assert self.state.color == self.player_color
# If the opponent resigns, then the agent wins
if opponent_resigned:
self.done = True
return self.state.board.encode(), 1., True, {'state': self.state}
# Reward: if nonterminal, then the reward is 0
if not self.state.board.is_terminal:
self.done = False
return self.state.board.encode(), 0., False, {'state': self.state}
# We're in a terminal state. Reward is 1 if won, -1 if lost
assert self.state.board.is_terminal
self.done = True
white_wins = self.state.board.official_score > 0
black_wins = self.state.board.official_score < 0
player_wins = (white_wins and self.player_color == pachi_py.WHITE) or (black_wins and self.player_color == pachi_py.BLACK)
reward = 1. if player_wins else -1. if (white_wins or black_wins) else 0.
return self.state.board.encode(), reward, True, {'state': self.state}