rlalgos/deprecated/dqn/duelling_dqn.py [133:181]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            n_slots=self.config["n_envs"] * self.config["n_threads"],
            create_agent=self._create_agent,
            create_env=self._create_env,
            env_args={
                "mode": "train",
                "n_envs": self.config["n_envs"],
                "max_episode_steps": self.config["max_episode_steps"],
                **{
                    k: self.config[k]
                    for k in self.config
                    if k.startswith("environment/")
                },
            },
            agent_args={"n_actions": self.n_actions, "model": model},
            n_threads=self.config["n_threads"],
            seeds=self.config["env_seed"],
        )

        model = copy.deepcopy(self.learning_model)
        self.evaluation_batcher = EpisodeBatcher(
            n_timesteps=self.config["max_episode_steps"],
            n_slots=self.config["n_evaluation_rollouts"],
            create_agent=self._create_agent,
            create_env=self._create_env,
            env_args={
                "mode": "evaluation",
                "max_episode_steps": self.config["max_episode_steps"],
                "n_envs": self.config["n_envs"],
                **{
                    k: self.config[k]
                    for k in self.config
                    if k.startswith("environment/")
                },
            },
            agent_args={"n_actions": self.n_actions, "model": model},
            n_threads=self.config["n_evaluation_threads"],
            seeds=self.config["env_seed"] * 10,
        )

        self.register_batcher(self.train_batcher)
        self.register_batcher(self.evaluation_batcher)

    def _state_dict(self, model, device):
        sd = model.state_dict()
        for k, v in sd.items():
            sd[k] = v.to(device)
        return sd

    def run(self):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


rlalgos/deprecated/ppo/discrete_ppo.py [54:102]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            n_slots=self.config["n_envs"] * self.config["n_threads"],
            create_agent=self._create_agent,
            create_env=self._create_env,
            env_args={
                "mode": "train",
                "n_envs": self.config["n_envs"],
                "max_episode_steps": self.config["max_episode_steps"],
                **{
                    k: self.config[k]
                    for k in self.config
                    if k.startswith("environment/")
                },
            },
            agent_args={"n_actions": self.n_actions, "model": model},
            n_threads=self.config["n_threads"],
            seeds=self.config["env_seed"],
        )

        model = copy.deepcopy(self.learning_model)
        self.evaluation_batcher = EpisodeBatcher(
            n_timesteps=self.config["max_episode_steps"],
            n_slots=self.config["n_evaluation_rollouts"],
            create_agent=self._create_agent,
            create_env=self._create_env,
            env_args={
                "mode": "evaluation",
                "max_episode_steps": self.config["max_episode_steps"],
                "n_envs": self.config["n_envs"],
                **{
                    k: self.config[k]
                    for k in self.config
                    if k.startswith("environment/")
                },
            },
            agent_args={"n_actions": self.n_actions, "model": model},
            n_threads=self.config["n_evaluation_threads"],
            seeds=self.config["env_seed"] * 10,
        )

        self.register_batcher(self.train_batcher)
        self.register_batcher(self.evaluation_batcher)

    def _state_dict(self, model, device):
        sd = model.state_dict()
        for k, v in sd.items():
            sd[k] = v.to(device)
        return sd

    def run(self):
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -