def compute_reward()

in env_humanoid_base.py [0:0]


    def compute_reward(self, error, fn_def):
        ''' 
        This computes a reward by using 
        task-specific errors and the reward definition tree
        '''
        op = fn_def['op']
        n = fn_def['name'] if 'name' in fn_def.keys() else 'noname'
        w = fn_def['weight'] if 'weight' in fn_def.keys() else 1.0

        rew_info = {'name': n, 'value': 0.0, 'op': op, 'weight': w, 'child_nodes': []}

        if op in ['add', 'sum']:
            rew = 0.0
            for child in fn_def['child_nodes']:
                r, rd = self.compute_reward(error, child)
                rew += r
                rew_info['child_nodes'].append(rd)
        elif op in ['mul', 'multiply']:
            rew = 1.0
            for child in fn_def['child_nodes']:
                r, rd = self.compute_reward(error, child)
                rew *= r
                rew_info['child_nodes'].append(rd)
        elif op == 'leaf':
            if 'kernel' in fn_def.keys():
                kernel = fn_def['kernel']
            else:
                kernel = None

            if 'weight_schedule' in fn_def.keys():
                timesteps_total = self._learning_info['timesteps_total']
                w *= math.lerp_from_paired_list(
                    timesteps_total, fn_def['weight_schedule'])
            
            if kernel is None or kernel['type'] == "none":
                e = error[n]
            elif kernel['type'] == "gaussian":
                e = np.exp(-kernel['scale']*error[n])
            else:
                raise NotImplementedError
            
            rew = w*e
        else:
            raise NotImplementedError

        rew_info['value'] = rew

        return rew, rew_info