MTRF/algorithms/softlearning/algorithms/multi_sac.py [1179:1208]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        return paths

    def _evaluate_rollouts(self, episodes_per_policy, env):
        """Compute evaluation metrics for the given rollouts."""
        diagnostics_per_policy = []
        for i, episodes in enumerate(episodes_per_policy):
            episodes_rewards = [episode['rewards'] for episode in episodes]
            episodes_reward = [np.sum(episode_rewards)
                               for episode_rewards in episodes_rewards]
            episodes_length = [episode_rewards.shape[0]
                               for episode_rewards in episodes_rewards]

            diagnostics = OrderedDict((
                ('episode-reward-mean', np.mean(episodes_reward)),
                ('episode-reward-min', np.min(episodes_reward)),
                ('episode-reward-max', np.max(episodes_reward)),
                ('episode-reward-std', np.std(episodes_reward)),
                ('episode-length-mean', np.mean(episodes_length)),
                ('episode-length-min', np.min(episodes_length)),
                ('episode-length-max', np.max(episodes_length)),
                ('episode-length-std', np.std(episodes_length)),
            ))

            env_infos = env.get_path_infos(episodes)
            for key, value in env_infos.items():
                diagnostics[f'env_infos/{key}'] = value

            diagnostics_per_policy.append(diagnostics)

        return diagnostics_per_policy
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



MTRF/algorithms/softlearning/algorithms/phased_sac.py [962:990]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        return paths

    def _evaluate_rollouts(self, episodes_per_policy, env):
        """Compute evaluation metrics for the given rollouts."""
        diagnostics_per_policy = []
        for i, episodes in enumerate(episodes_per_policy):
            episodes_rewards = [episode['rewards'] for episode in episodes]
            episodes_reward = [np.sum(episode_rewards)
                               for episode_rewards in episodes_rewards]
            episodes_length = [episode_rewards.shape[0]
                               for episode_rewards in episodes_rewards]

            diagnostics = OrderedDict((
                ('episode-reward-mean', np.mean(episodes_reward)),
                ('episode-reward-min', np.min(episodes_reward)),
                ('episode-reward-max', np.max(episodes_reward)),
                ('episode-reward-std', np.std(episodes_reward)),
                ('episode-length-mean', np.mean(episodes_length)),
                ('episode-length-min', np.min(episodes_length)),
                ('episode-length-max', np.max(episodes_length)),
                ('episode-length-std', np.std(episodes_length)),
            ))

            env_infos = env.get_path_infos(episodes)
            for key, value in env_infos.items():
                diagnostics[f'env_infos/{key}'] = value

            diagnostics_per_policy.append(diagnostics)
        return diagnostics_per_policy
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



