in bisk/tasks/polebalance.py [0:0]
def step(self, action):
obs, reward, done, info = super().step(action)
reward = 1.0
# Failure is defined as the z range of bottom and top of pole tower
# falls below 20% of total length.
xpos = self.p.named.data.xpos
xquat = self.p.named.data.xquat
t = np.zeros(3)
mjlib.mju_rotVecQuat(
t,
np.array([0.0, 0.0, -self.pole_length / 2]),
xquat['robot/pole-0'],
)
bottom_z = xpos['robot/pole-0'][2] + t[2]
mjlib.mju_rotVecQuat(
t,
np.array([0.0, 0.0, self.pole_length / 2]),
xquat[f'robot/pole-{self.n_poles-1}'],
)
top_z = xpos[f'robot/pole-{self.n_poles-1}'][2] + t[2]
zthresh = 0.8 * self.n_poles * self.pole_length
if top_z - bottom_z < zthresh:
done = True
score = 1 if not done else 0
info['score'] = score
reward = score
if info.get('fell_over', False):
done = True
reward = -1
return obs, reward, done, info