in optimum_benchmark/trackers/memory.py [0:0]
def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, int, List[int]]] = None):
self.device = device
self.backend = backend
self.device_ids = device_ids
self.monitored_pid = os.getpid()
self.is_gpu = device == "cuda"
self.is_pytorch_cuda = (self.backend, self.device) == ("pytorch", "cuda")
LOGGER.info(f"\t\t+ Tracking RAM memory of process {self.monitored_pid}")
if self.is_gpu:
if isinstance(self.device_ids, str):
self.device_ids = list(map(int, self.device_ids.split(",")))
elif isinstance(self.device_ids, int):
self.device_ids = [self.device_ids]
elif isinstance(self.device_ids, list):
self.device_ids = self.device_ids
elif self.device_ids is None:
raise ValueError("GPU device IDs must be provided for energy tracking on GPUs")
else:
raise ValueError("GPU device IDs must be a string, an integer, or a list of integers")
LOGGER.info(f"\t\t+ Tracking GPU memory of devices {self.device_ids}")
if self.is_pytorch_cuda:
self.num_pytorch_devices = torch.cuda.device_count()
if len(self.device_ids) != self.num_pytorch_devices:
raise ValueError(
"The number of target GPU devices and Pytorch's GPU device count do not match. "
f"Got {len(self.device_ids)} and {self.num_pytorch_devices} respectively."
)
LOGGER.info(f"\t\t+ Tracking Allocated/Reserved memory of {self.num_pytorch_devices} Pytorch CUDA devices")
self.max_ram_memory = None
self.max_global_vram_memory = None
self.max_process_vram_memory = None
self.max_reserved_memory = None
self.max_allocated_memory = None