in gym/envs/toy_text/taxi.py [0:0]
def __init__(self, render_mode: Optional[str] = None):
self.desc = np.asarray(MAP, dtype="c")
self.locs = locs = [(0, 0), (0, 4), (4, 0), (4, 3)]
self.locs_colors = [(255, 0, 0), (0, 255, 0), (255, 255, 0), (0, 0, 255)]
num_states = 500
num_rows = 5
num_columns = 5
max_row = num_rows - 1
max_col = num_columns - 1
self.initial_state_distrib = np.zeros(num_states)
num_actions = 6
self.P = {
state: {action: [] for action in range(num_actions)}
for state in range(num_states)
}
for row in range(num_rows):
for col in range(num_columns):
for pass_idx in range(len(locs) + 1): # +1 for being inside taxi
for dest_idx in range(len(locs)):
state = self.encode(row, col, pass_idx, dest_idx)
if pass_idx < 4 and pass_idx != dest_idx:
self.initial_state_distrib[state] += 1
for action in range(num_actions):
# defaults
new_row, new_col, new_pass_idx = row, col, pass_idx
reward = (
-1
) # default reward when there is no pickup/dropoff
terminated = False
taxi_loc = (row, col)
if action == 0:
new_row = min(row + 1, max_row)
elif action == 1:
new_row = max(row - 1, 0)
if action == 2 and self.desc[1 + row, 2 * col + 2] == b":":
new_col = min(col + 1, max_col)
elif action == 3 and self.desc[1 + row, 2 * col] == b":":
new_col = max(col - 1, 0)
elif action == 4: # pickup
if pass_idx < 4 and taxi_loc == locs[pass_idx]:
new_pass_idx = 4
else: # passenger not at location
reward = -10
elif action == 5: # dropoff
if (taxi_loc == locs[dest_idx]) and pass_idx == 4:
new_pass_idx = dest_idx
terminated = True
reward = 20
elif (taxi_loc in locs) and pass_idx == 4:
new_pass_idx = locs.index(taxi_loc)
else: # dropoff at wrong location
reward = -10
new_state = self.encode(
new_row, new_col, new_pass_idx, dest_idx
)
self.P[state][action].append(
(1.0, new_state, reward, terminated)
)
self.initial_state_distrib /= self.initial_state_distrib.sum()
self.action_space = spaces.Discrete(num_actions)
self.observation_space = spaces.Discrete(num_states)
self.render_mode = render_mode
# pygame utils
self.window = None
self.clock = None
self.cell_size = (
WINDOW_SIZE[0] / self.desc.shape[1],
WINDOW_SIZE[1] / self.desc.shape[0],
)
self.taxi_imgs = None
self.taxi_orientation = 0
self.passenger_img = None
self.destination_img = None
self.median_horiz = None
self.median_vert = None
self.background_img = None