in llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py [0:0]
def is_benchmark_supported(self, **kwargs) -> bool:
if (
kwargs["attn_implementation"] == "flash_attention_2"
and kwargs["weights_config"] == "float32"
):
return False
return True