in gym/envs/toy_text/blackjack.py [0:0]
def step(self, action):
assert self.action_space.contains(action)
if action: # hit: add a card to players hand and return
self.player.append(draw_card(self.np_random))
if is_bust(self.player):
terminated = True
reward = -1.0
else:
terminated = False
reward = 0.0
else: # stick: play out the dealers hand, and score
terminated = True
while sum_hand(self.dealer) < 17:
self.dealer.append(draw_card(self.np_random))
reward = cmp(score(self.player), score(self.dealer))
if self.sab and is_natural(self.player) and not is_natural(self.dealer):
# Player automatically wins. Rules consistent with S&B
reward = 1.0
elif (
not self.sab
and self.natural
and is_natural(self.player)
and reward == 1.0
):
# Natural gives extra points, but doesn't autowin. Legacy implementation
reward = 1.5
if self.render_mode == "human":
self.render()
return self._get_obs(), reward, terminated, False, {}