MTRF/algorithms/softlearning/algorithms/multi_sac.py [35:66]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
            self,
            training_environment,
            evaluation_environment,
            policies,
            Qs_per_policy,
            Q_targets_per_policy,
            pools,
            samplers,
            # goals=(),
            num_goals,
            plotter=None,

            # hyperparams shared across policies
            lr=3e-4,
            reward_scale=1.0,
            target_entropy='auto',
            discount=0.99,
            tau=5e-3,
            target_update_interval=1,
            action_prior='uniform',
            reparameterize=False,
            her_iters=0,
            save_full_state=False,
            save_eval_paths=False,
            per_alpha=1,
            normalize_ext_reward_gamma=1,
            ext_reward_coeffs=[],
            rnd_networks=(),
            rnd_lr=1e-4,
            rnd_int_rew_coeffs=[],
            rnd_gamma=0.99,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


MTRF/algorithms/softlearning/algorithms/phased_sac.py [35:65]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(
            self,
            training_environment,
            evaluation_environment,
            policies,
            Qs_per_policy,
            Q_targets_per_policy,
            pools,
            samplers,
            num_goals,
            plotter=None,

            # hyperparams shared across policies
            lr=3e-4,
            reward_scale=1.0,
            target_entropy='auto',
            discount=0.99,
            tau=5e-3,
            target_update_interval=1,
            action_prior='uniform',
            reparameterize=False,
            her_iters=0,
            save_full_state=False,
            save_eval_paths=False,
            per_alpha=1,
            normalize_ext_reward_gamma=1,
            ext_reward_coeffs=[],
            rnd_networks=(),
            rnd_lr=1e-4,
            rnd_int_rew_coeffs=[],
            rnd_gamma=0.99,
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -