def _assign_devices()

in fairdiplomacy/selfplay/search_data_loader.py [0:0]
47 lines of code
19 McCabe index (conditional complexity)

    def _assign_devices(self):
        """Sets what CPUs and GPUs to use.

        Sets
            self.cores
            self._rollout_devices
            self._sitcheck_device
        """
        self.cores: Optional[Tuple[int, ...]]
        self._rollout_devices: List[str]
        self._sitcheck_device: Optional[str]
        self._eval_sp_device: Optional[str]
        self._eval_h2h_devices: Optional[List[str]]
        self._num_eval_procs: int

        if self.rollout_cfg.num_cores_to_reserve and self._ectx.is_training_master:
            self.cores = tuple(range(80 - self.rollout_cfg.num_cores_to_reserve, 80))
        else:
            self.cores = None

        if not torch.cuda.is_available():
            # CircleCI probably.
            self._rollout_devices = ["cpu"]
            self._sitcheck_device = "cpu"
            self._eval_sp_device = "cpu"
            self._eval_h2h_devices = ["cpu"] * len(self.h2h_evals)
            self._num_eval_procs = 1
        else:
            devices = [f"cuda:{i}" for i in range(torch.cuda.device_count())]
            if (
                len(devices) > 1
                and self._ectx.is_training_master
                and not self.rollout_cfg.benchmark_only
            ):
                # If the trainer and has more than 1 gpu, don't use GPU used for training.
                devices = devices[self._num_train_gpus :]
            full_machine = torch.cuda.device_count() == 8
            if self.rollout_cfg.test_situation_eval.do_eval and self._ectx.is_training_master:
                self._sitcheck_device = devices.pop(0) if full_machine else devices[0]
            else:
                self._sitcheck_device = None
            if self._ectx.is_training_master:
                self._eval_sp_device = devices.pop(0) if full_machine else devices[0]
            else:
                self._eval_sp_device = None
            if self._ectx.is_training_master:
                self._eval_h2h_devices = [
                    devices.pop(0) if full_machine else devices[0] for _ in self.h2h_evals
                ]
            else:
                self._eval_h2h_devices = None
            self._rollout_devices = devices
            self._num_eval_procs = 5 if full_machine else 1

        logging.info(f"Sit check device {self._sitcheck_device}")
        logging.info(f"Eval SelfPlay device {self._eval_sp_device}")
        logging.info(f"Eval H2H devices {self._eval_h2h_devices}")
        logging.info(f"Procs to use for evals: {self._num_eval_procs}")
        logging.info(f"Rollout devices {self._rollout_devices}")