def get_promised_flop_per_sec()

in bench_cluster/report.py [0:0]


def get_promised_flop_per_sec(dtype: torch.dtype) -> float:
    """Return the peak FLOP/s for the GPU operating on `dtype`."""
    
    # Run nvidia-smi command and capture output
    try:
        result = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], 
                                capture_output=True, text=True, check=True)
        gpu_name = result.stdout.strip()
    except subprocess.CalledProcessError:
        raise RuntimeError("Failed to run nvidia-smi. Make sure it's installed and accessible.")
    except FileNotFoundError:
        raise RuntimeError("nvidia-smi command not found. Make sure NVIDIA drivers are installed.")

    # Extract GPU model (they are exponent 12)
    if "A100" in gpu_name:
        if dtype == torch.float32:
            return 19.5  # 19.5 TFLOP/s
        if dtype in (torch.bfloat16, torch.float16):
            return 312   # 312 TFLOP/s
    elif "H100" in gpu_name or "GH200" in gpu_name:
        if dtype == torch.float32:
            return 67.5  # 67.5 TFLOP/s
        if dtype in (torch.bfloat16, torch.float16):
            return (1979 / 2)  # 989.5 TFLOP/s (half of 1979 for dense operations)
    else:
        raise ValueError(f"Unsupported GPU model: {gpu_name}")

    raise ValueError(f"Unknown dtype: {dtype}")