in env_humanoid_base.py [0:0]
def compute_reward(self, error, fn_def):
'''
This computes a reward by using
task-specific errors and the reward definition tree
'''
op = fn_def['op']
n = fn_def['name'] if 'name' in fn_def.keys() else 'noname'
w = fn_def['weight'] if 'weight' in fn_def.keys() else 1.0
rew_info = {'name': n, 'value': 0.0, 'op': op, 'weight': w, 'child_nodes': []}
if op in ['add', 'sum']:
rew = 0.0
for child in fn_def['child_nodes']:
r, rd = self.compute_reward(error, child)
rew += r
rew_info['child_nodes'].append(rd)
elif op in ['mul', 'multiply']:
rew = 1.0
for child in fn_def['child_nodes']:
r, rd = self.compute_reward(error, child)
rew *= r
rew_info['child_nodes'].append(rd)
elif op == 'leaf':
if 'kernel' in fn_def.keys():
kernel = fn_def['kernel']
else:
kernel = None
if 'weight_schedule' in fn_def.keys():
timesteps_total = self._learning_info['timesteps_total']
w *= math.lerp_from_paired_list(
timesteps_total, fn_def['weight_schedule'])
if kernel is None or kernel['type'] == "none":
e = error[n]
elif kernel['type'] == "gaussian":
e = np.exp(-kernel['scale']*error[n])
else:
raise NotImplementedError
rew = w*e
else:
raise NotImplementedError
rew_info['value'] = rew
return rew, rew_info