in bench_cluster/report.py [0:0]
def get_promised_flop_per_sec(dtype: torch.dtype) -> float:
"""Return the peak FLOP/s for the GPU operating on `dtype`."""
# Run nvidia-smi command and capture output
try:
result = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
capture_output=True, text=True, check=True)
gpu_name = result.stdout.strip()
except subprocess.CalledProcessError:
raise RuntimeError("Failed to run nvidia-smi. Make sure it's installed and accessible.")
except FileNotFoundError:
raise RuntimeError("nvidia-smi command not found. Make sure NVIDIA drivers are installed.")
# Extract GPU model (they are exponent 12)
if "A100" in gpu_name:
if dtype == torch.float32:
return 19.5 # 19.5 TFLOP/s
if dtype in (torch.bfloat16, torch.float16):
return 312 # 312 TFLOP/s
elif "H100" in gpu_name or "GH200" in gpu_name:
if dtype == torch.float32:
return 67.5 # 67.5 TFLOP/s
if dtype in (torch.bfloat16, torch.float16):
return (1979 / 2) # 989.5 TFLOP/s (half of 1979 for dense operations)
else:
raise ValueError(f"Unsupported GPU model: {gpu_name}")
raise ValueError(f"Unknown dtype: {dtype}")