in optimum_benchmark/launchers/base.py [0:0]
def device_isolation(self, pid: int, device_ids: Optional[str] = None):
if device_ids is None:
if is_rocm_system():
device_ids = os.environ.get("ROCR_VISIBLE_DEVICES", None)
elif is_nvidia_system():
device_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None)
self.device_isolation_process = Process(
target=assert_device_isolation,
kwargs={"action": self.config.device_isolation_action, "device_ids": device_ids, "pid": pid},
daemon=True,
)
self.device_isolation_process.start()
self.logger.info(f"\t+ Isolating device(s) [{device_ids}] for process [{pid}] and its children")
self.logger.info(f"\t+ Executing action [{self.config.device_isolation_action}] in case of violation")
yield
self.logger.info("\t+ Stopping device isolation process")
self.device_isolation_process.terminate()
self.device_isolation_process.join()
self.device_isolation_process.close()