in optimum_benchmark/backends/pytorch/backend.py [0:0]
def is_exllamav2(self) -> bool:
return (
self.is_quantized
and (self.is_gptq_quantized)
and (
(
hasattr(self.pretrained_config, "quantization_config")
and hasattr(self.pretrained_config.quantization_config, "exllama_config")
and self.pretrained_config.quantization_config.exllama_config.get("version") == 2
)
or (
"exllama_config" in self.config.quantization_config
and self.config.quantization_config["exllama_config"].get("version") == 2
)
)
)