def sweep()

in svg/sweeper.py [0:0]


    def sweep(self, arguments):
        overrides = []

        MBPO_ENVS = ['mbpo_cheetah', 'mbpo_hopper', 'mbpo_walker2d',
                'mbpo_humanoid', 'mbpo_ant']
        POPLIN_ENVS = ['poplin_ant', 'poplin_cheetah', 'poplin_pets_cheetah',
                'poplin_swimmer', 'poplin_walker2d', 'poplin_hopper']
        ENV_DEFAULTS = {
            'full_poplin_sweep': POPLIN_ENVS,
            'full_mbpo_sweep': MBPO_ENVS,
            'mbpo_sac_baseline': MBPO_ENVS,
        }
        HORIZON_DEFAULTS = {
            'full_poplin_sweep': [0, 2, 3, 4, 5, 6, 11],
            'full_mbpo_sweep': [0, 2, 3, 4],
            'mbpo_sac_baseline': [0],
        }

        assert self.config.experiment in ENV_DEFAULTS, \
          "experiment not recognized"
        envs = ENV_DEFAULTS[self.config.experiment]
        horizons = HORIZON_DEFAULTS[self.config.experiment]

        n_sample = self.config.sweep.n_sample
        n_seed = self.config.sweep.n_seed

        npr.seed(self.config.seed)
        overrides = []
        for env in envs:
            for sample in range(n_sample):
                horizon = npr.choice(horizons)
                init_targ_entr = npr.choice([1, 0, -1, -2])
                final_targ_entr_choices = list(
                    range(init_targ_entr, -5, -1)) + \
                    [-2**i for i in range(3,5+1)]
                final_targ_entr = npr.choice(final_targ_entr_choices)
                gamma_choices = list(reversed(
                    [2**(-i) for i in range(1,7)])) + \
                    [2**i for i in range(0,7)]
                gamma = npr.choice(gamma_choices)
                for seed in range(1, n_seed+1):
                    overrides_i = {
                        'experiment': self.config.experiment,
                        'env': env,
                        'seed': seed,
                        'agent.horizon': horizon,
                        'learn_temp.init_targ_entr': init_targ_entr,
                        'learn_temp.final_targ_entr': final_targ_entr,
                        'learn_temp.entr_decay_factor': gamma
                    }
                    overrides_i = [
                        f'{k}={v}' for k,v in overrides_i.items()
                    ]
                    overrides.append(overrides_i)
        random.shuffle(overrides)
        # self.validate_batch_is_legal(overrides) # Can take a long time
        returns = self.launcher.launch(overrides, initial_job_idx=self.job_idx)
        self.job_idx += len(returns)