in fairdiplomacy/selfplay/search_data_loader.py [0:0]
def _assign_devices(self):
"""Sets what CPUs and GPUs to use.
Sets
self.cores
self._rollout_devices
self._sitcheck_device
"""
self.cores: Optional[Tuple[int, ...]]
self._rollout_devices: List[str]
self._sitcheck_device: Optional[str]
self._eval_sp_device: Optional[str]
self._eval_h2h_devices: Optional[List[str]]
self._num_eval_procs: int
if self.rollout_cfg.num_cores_to_reserve and self._ectx.is_training_master:
self.cores = tuple(range(80 - self.rollout_cfg.num_cores_to_reserve, 80))
else:
self.cores = None
if not torch.cuda.is_available():
# CircleCI probably.
self._rollout_devices = ["cpu"]
self._sitcheck_device = "cpu"
self._eval_sp_device = "cpu"
self._eval_h2h_devices = ["cpu"] * len(self.h2h_evals)
self._num_eval_procs = 1
else:
devices = [f"cuda:{i}" for i in range(torch.cuda.device_count())]
if (
len(devices) > 1
and self._ectx.is_training_master
and not self.rollout_cfg.benchmark_only
):
# If the trainer and has more than 1 gpu, don't use GPU used for training.
devices = devices[self._num_train_gpus :]
full_machine = torch.cuda.device_count() == 8
if self.rollout_cfg.test_situation_eval.do_eval and self._ectx.is_training_master:
self._sitcheck_device = devices.pop(0) if full_machine else devices[0]
else:
self._sitcheck_device = None
if self._ectx.is_training_master:
self._eval_sp_device = devices.pop(0) if full_machine else devices[0]
else:
self._eval_sp_device = None
if self._ectx.is_training_master:
self._eval_h2h_devices = [
devices.pop(0) if full_machine else devices[0] for _ in self.h2h_evals
]
else:
self._eval_h2h_devices = None
self._rollout_devices = devices
self._num_eval_procs = 5 if full_machine else 1
logging.info(f"Sit check device {self._sitcheck_device}")
logging.info(f"Eval SelfPlay device {self._eval_sp_device}")
logging.info(f"Eval H2H devices {self._eval_h2h_devices}")
logging.info(f"Procs to use for evals: {self._num_eval_procs}")
logging.info(f"Rollout devices {self._rollout_devices}")