def step()

in mujoco_worldgen/env.py [0:0]


    def step(self, action):
        action = np.asarray(action)
        action = np.minimum(action, self.action_space.high)
        action = np.maximum(action, self.action_space.low)
        assert self.action_space.contains(action), (
            'Action should be in action_space:\nSPACE=%s\nACTION=%s' %
            (self.action_space, action))
        self.set_action(self.sim, action)
        self.sim.step()
        # Need to call forward() so that sites etc are updated,
        # since they're used in the reward computations.
        self.sim.forward()
        self.t += 1

        reward = self.get_reward(self.sim)
        if not isinstance(reward, float):
            raise TypeError("The return value of get_reward must be a float")

        obs = self.get_obs(self.sim)
        diverged, divergence_reward = self.get_diverged(self.sim)

        if not isinstance(diverged, bool):
            raise TypeError(
                "The first return value of get_diverged must be boolean")
        if not isinstance(divergence_reward, float):
            raise TypeError(
                "The second return value of get_diverged must be float")

        if diverged:
            done = True
            if divergence_reward is not None:
                reward = divergence_reward
        elif self.horizon is not None:
            done = (self.t >= self.horizon)
        else:
            done = False

        info = self.get_info(self.sim)
        info["diverged"] = divergence_reward
        # Return value as required by Gym
        return obs, reward, done, info