in DownstreamRL/TrainZPolicyRL.py [0:0]
def reorder_actions(self, actions):
# Assume that the actions are 16 dimensional, and are ordered as:
# 7 DoF for left arm, 7 DoF for right arm, 1 for left gripper, and 1 for right gripper.
# The original trajectory has gripper values from 0 (Close) to 1 (Open), but we've to rescale to -1 (Open) to 1 (Close) for Mujoco.
# And handle joint velocities.
# MIME Gripper values are from 0 to 100 (Close to Open), but we assume actions has values from 0 to 1 (Close to Open), and then rescale to (-1 Open to 1 Close) for Mujoco.
# Mujoco needs them flipped.
indices = np.array([ 7, 8, 9, 10, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, 15, 14])
reordered_actions = actions[:,indices]
reordered_actions[:,14:] = 1 - 2*reordered_actions[:,14:]
return reordered_actions