in source/BattlesnakeGym/battlesnake_gym/snake_gym.py [0:0]
def step(self, actions, episodes=None):
'''
Inherited function of the openAI gym. The steps taken mimic the steps provided in
https://docs.battlesnake.com/references/rules -> Programming Your Snake -> 3) Turn resolution.
Parameters:
---------
action: np.array(number_of_snakes)
Array of integers containing an action for each number of snake.
The integers range from 0 to 3 corresponding to Snake.UP, Snake.DOWN, Snake.LEFT, Snake.RIGHT
respectively
Returns:
-------
observation: np.array
Output of the current state of the gym
reward: {}
The rewards obtained by each snake.
Dictionary is of length number_of_snakes
done: Bool
Indication of whether the gym is complete or not.
Gym is complete when there is only 1 snake remaining
'''
# setup reward dict
reward = {}
snake_info = {}
# DEBUGING
json_before_moving = self.get_json()
# Reduce health and move
for i, snake in enumerate(self.snakes.get_snakes()):
reward[i] = 0
if not snake.is_alive():
continue
# Reduce health by one
snake.health -= 1
if snake.health == 0:
snake.kill_snake()
reward[i] += self.rewards.get_reward("starved", i, episodes)
snake_info[i] = "Starved"
continue
action = actions[i]
is_forbidden = snake.move(action)
if is_forbidden:
snake.kill_snake()
reward[i] += self.rewards.get_reward("forbidden_move", i, episodes)
snake_info[i] = "Forbidden move"
# check for food and collision
number_of_food_eaten = 0
number_of_snakes_alive = 0
# DEBUGING
json_after_moving = self.get_json()
snakes_to_be_killed = []
for i, snake in enumerate(self.snakes.get_snakes()):
if not snake.is_alive():
continue
snake_head_location = snake.get_head()
# Check for collisions with the snake
should_kill_snake, outcome = self._did_snake_collide(snake, snakes_to_be_killed)
if should_kill_snake:
snakes_to_be_killed.append(snake)
snake_info[i] = outcome
# Check if snakes ate any food
if not should_kill_snake and self.food.does_coord_have_food(snake_head_location):
number_of_food_eaten += 1
snake.set_ate_food()
self.food.remove_food_from_coord(snake_head_location)
reward[i] += self.rewards.get_reward("ate_food", i, episodes)
# Calculate rewards for collision
if outcome == "Snake hit wall":
reward[i] += self.rewards.get_reward("hit_wall", i, episodes)
elif outcome == "Snake was eaten - same tile":
reward[i] += self.rewards.get_reward("was_eaten", i, episodes)
elif outcome == "Snake was eaten - adjacent tile":
reward[i] += self.rewards.get_reward("was_eaten", i, episodes)
elif outcome == "Snake hit body - hit itself":
reward[i] += self.rewards.get_reward("hit_self", i, episodes)
elif outcome == "Snake hit body - hit other":
reward[i] += self.rewards.get_reward("hit_other_snake", i,
episodes)
elif outcome == "Other snake hit body":
reward[i] += self.rewards.get_reward("other_snake_hit_body", i,
episodes)
elif outcome == "Did not collide":
pass
elif outcome == "Ate another snake":
reward[i] += self.rewards.get_reward("ate_another_snake", i,
episodes)
for snake_to_be_killed in snakes_to_be_killed:
snake_to_be_killed.kill_snake()
snakes_alive = []
for i, snake in enumerate(self.snakes.get_snakes()):
snakes_alive.append(snake.is_alive())
if snake.is_alive():
number_of_snakes_alive += 1
reward[i] += self.rewards.get_reward("another_turn", i, episodes)
self.food.end_of_turn(self.snakes.get_snake_51_map())
if self.number_of_snakes > 1 and np.sum(snakes_alive) <= 1:
done = True
for i, is_snake_alive in enumerate(snakes_alive):
if is_snake_alive:
reward[i] += self.rewards.get_reward("won", i, episodes)
else:
reward[i] += self.rewards.get_reward("died", i, episodes)
else:
done = False
snake_alive_dict = {i: a for i, a in enumerate(np.logical_not(snakes_alive).tolist())}
self.turn_count += 1
snakes_health = {}
for i, snake in enumerate(self.snakes.get_snakes()):
snakes_health[i] = snake.health
if snake.is_alive():
self.snake_max_len[i] += 1
if i not in snake_info:
snake_info[i] = "Dead"
sum_map = self.snakes.get_snake_51_map()
if np.max(sum_map) > 5 or 2 in sum_map:
print("snake info {}".format(snake_info))
print("actions {}".format(actions))
print("before moving json {}".format(json_before_moving))
print("after moving json {}".format(json_after_moving))
print("final json {}".format(self.get_json()))
raise
return self._get_observation(), reward, snake_alive_dict, {'current_turn': self.turn_count,
'snake_health': snakes_health,
'snake_info': snake_info,
'snake_max_len': self.snake_max_len}