in optimum_benchmark/backends/pytorch/backend.py [0:0]
def __init__(self, config: PyTorchConfig):
super().__init__(config)
# Threads
if self.config.inter_op_num_threads is not None:
self.logger.info(f"\t+ Setting pytorch inter_op_num_threads({self.config.inter_op_num_threads}))")
torch.set_num_threads(self.config.inter_op_num_threads)
if self.config.intra_op_num_threads is not None:
self.logger.info(f"\t+ Setting pytorch intra_op_num_threads({self.config.intra_op_num_threads}))")
torch.set_num_interop_threads(self.config.intra_op_num_threads)
# TF32
if self.config.allow_tf32:
self.logger.info("\t+ Enabling TF32")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
# Autocast
if self.config.autocast_enabled:
self.logger.info("\t+ Enabling automatic mixed precision")
torch.set_autocast_enabled(True)
if self.config.autocast_dtype is not None:
if self.config.device == "cpu":
self.logger.info(f"\t+ Setting autocast cpu dtype to {self.config.autocast_dtype}")
torch.set_autocast_cpu_dtype(getattr(torch, self.config.autocast_dtype))
elif self.config.device == "cuda":
self.logger.info(f"\t+ Setting autocast gpu dtype to {self.config.autocast_dtype}")
torch.set_autocast_gpu_dtype(getattr(torch, self.config.autocast_dtype))
else:
raise ValueError(f"Device {self.config.device} not supported for autocast")