in jat/eval/rl/wrappers.py [0:0]
def step(self, action: int):
"""
Step the environment with the given action
Repeat action, sum reward, and max over last observations.
:param action: the action
:return: observation, reward, terminated, truncated, information
"""
total_reward = 0.0
info = {}
terminated = truncated = False
for i in range(self.skip):
obs, reward, terminated, truncated, info = self.env.step(action)
if i == self.skip - 2:
self._obs_buffer[0] = obs
if i == self.skip - 1:
self._obs_buffer[1] = obs
total_reward += reward
if terminated | truncated:
break
# Note that the observation on the done=True frame doesn't matter
max_frame = self._obs_buffer.max(axis=0)
return max_frame, total_reward, terminated, truncated, info