def get_benchmark

def get_benchmark_config()

in llm_perf/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py [0:0]
41 lines of code
1 McCabe index (conditional complexity)

    def get_benchmark_config(self, model: str, **kwargs) -> BenchmarkConfig:
        weights_config = kwargs["weights_config"]
        attn_implementation = kwargs["attn_implementation"]

        assert (
            weights_config in self.weights_configs
        ), f"your config does contains the {weights_config}, adjust your _get_weights_configs to fix this issue"

        torch_dtype = self.weights_configs[weights_config]["torch_dtype"]
        quant_scheme = self.weights_configs[weights_config]["quant_scheme"]
        quant_config = self.weights_configs[weights_config]["quant_config"]

        launcher_config = ProcessConfig(
            device_isolation=True, device_isolation_action="kill"
        )
        scenario_config = InferenceConfig(
            memory=True,
            energy=True,
            latency=True,
            duration=10,
            iterations=10,
            warmup_runs=10,
            input_shapes=INPUT_SHAPES,
            generate_kwargs=GENERATE_KWARGS,
        )
        backend_config = PyTorchConfig(
            model=model,
            device="cuda",
            device_ids="0",
            no_weights=True,
            library="transformers",
            task="text-generation",
            torch_dtype=torch_dtype,
            quantization_scheme=quant_scheme,
            quantization_config=quant_config,
            attn_implementation=attn_implementation,
            model_kwargs={"trust_remote_code": True},
        )

        return BenchmarkConfig(
            name=f"{weights_config}-{attn_implementation}",
            scenario=scenario_config,
            launcher=launcher_config,
            backend=backend_config,
        )