in mujoco_worldgen/env.py [0:0]
def step(self, action):
action = np.asarray(action)
action = np.minimum(action, self.action_space.high)
action = np.maximum(action, self.action_space.low)
assert self.action_space.contains(action), (
'Action should be in action_space:\nSPACE=%s\nACTION=%s' %
(self.action_space, action))
self.set_action(self.sim, action)
self.sim.step()
# Need to call forward() so that sites etc are updated,
# since they're used in the reward computations.
self.sim.forward()
self.t += 1
reward = self.get_reward(self.sim)
if not isinstance(reward, float):
raise TypeError("The return value of get_reward must be a float")
obs = self.get_obs(self.sim)
diverged, divergence_reward = self.get_diverged(self.sim)
if not isinstance(diverged, bool):
raise TypeError(
"The first return value of get_diverged must be boolean")
if not isinstance(divergence_reward, float):
raise TypeError(
"The second return value of get_diverged must be float")
if diverged:
done = True
if divergence_reward is not None:
reward = divergence_reward
elif self.horizon is not None:
done = (self.t >= self.horizon)
else:
done = False
info = self.get_info(self.sim)
info["diverged"] = divergence_reward
# Return value as required by Gym
return obs, reward, done, info