def __lazy_load_client()

in src/lighteval/metrics/llm_as_judge.py [0:0]


    def __lazy_load_client(self):  # noqa: C901
        match self.backend:
            # Both "openai" and "tgi" backends use the OpenAI-compatible API
            # They are handled separately to allow for backend-specific validation and setup
            case "openai" | "tgi":
                if not is_openai_available():
                    raise RuntimeError("OpenAI backend is not available.")
                if self.client is None:
                    from openai import OpenAI

                    self.client = OpenAI(
                        api_key=self.api_key if self.url is None else None, base_url=self.url if self.url else None
                    )
                return self.__call_api_parallel

            case "litellm":
                if not is_litellm_available():
                    raise RuntimeError("litellm is not available.")
                return self.__call_litellm

            case "vllm":
                if not is_vllm_available():
                    raise RuntimeError("vllm is not available.")
                if self.pipe is None:
                    from vllm import LLM, SamplingParams
                    from vllm.transformers_utils.tokenizer import get_tokenizer

                    self.sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=self.max_tokens)
                    self.tokenizer = get_tokenizer(self.model, tokenizer_mode="auto")
                    self.pipe = LLM(model=self.model, max_model_len=2048, gpu_memory_utilization=0.5, dtype="float16")
                return self.__call_vllm

            case "transformers":
                if self.pipe is None:
                    import torch
                    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

                    transformers_model = AutoModelForCausalLM.from_pretrained(
                        self.model, torch_dtype=torch.float16, trust_remote_code=False, device_map="cuda"
                    )
                    tokenizer = AutoTokenizer.from_pretrained(self.model)
                    self.pipe = pipeline(
                        "text-generation",
                        model=transformers_model,
                        tokenizer=tokenizer,
                        max_new_tokens=self.max_tokens,
                    )
                return self.__call_transformers

            case "inference-providers":
                from huggingface_hub import AsyncInferenceClient

                self.client = AsyncInferenceClient(token=self.api_key, base_url=self.url, provider=self.hf_provider)
                return self.__call_hf_inference_async

            case _:
                raise ValueError(f"Unsupported backend: {self.backend}")