def step()

in bisk/tasks/polebalance.py [0:0]


    def step(self, action):
        obs, reward, done, info = super().step(action)
        reward = 1.0

        # Failure is defined as the z range of bottom and top of pole tower
        # falls below 20% of total length.
        xpos = self.p.named.data.xpos
        xquat = self.p.named.data.xquat
        t = np.zeros(3)
        mjlib.mju_rotVecQuat(
            t,
            np.array([0.0, 0.0, -self.pole_length / 2]),
            xquat['robot/pole-0'],
        )
        bottom_z = xpos['robot/pole-0'][2] + t[2]
        mjlib.mju_rotVecQuat(
            t,
            np.array([0.0, 0.0, self.pole_length / 2]),
            xquat[f'robot/pole-{self.n_poles-1}'],
        )
        top_z = xpos[f'robot/pole-{self.n_poles-1}'][2] + t[2]

        zthresh = 0.8 * self.n_poles * self.pole_length
        if top_z - bottom_z < zthresh:
            done = True
        score = 1 if not done else 0
        info['score'] = score
        reward = score

        if info.get('fell_over', False):
            done = True
            reward = -1
        return obs, reward, done, info