src/optimum/nvidia/runtime.py [139:162]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        self,
        inputs: MaybeBatchedToken,
        generation_config: Optional["GenerationConfig"] = None,
        **kwargs,
    ) -> MaybeBatchedToken:
        if not self._executor:
            self._executor = LLM(
                str(self._engines_path),
                skip_tokenizer_init=True,
            )

        if generation_config is None and kwargs:
            generation_config = deepcopy(self._generation_config)
            generation_config.update(**kwargs)

        # Retrieve the sampling config
        sampling = (
            convert_generation_config(generation_config)
            if generation_config
            else self._sampling_config
        )

        if isinstance(inputs, torch.Tensor):
            inputs = inputs.tolist()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/optimum/nvidia/runtime.py [174:197]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        self,
        inputs: MaybeBatchedToken,
        generation_config: Optional["GenerationConfig"] = None,
        **kwargs,
    ) -> MaybeBatchedToken:
        if not self._executor:
            self._executor = LLM(
                str(self._engines_path),
                skip_tokenizer_init=True,
            )

        if generation_config is None and kwargs:
            generation_config = deepcopy(self._generation_config)
            generation_config.update(**kwargs)

        # Retrieve the sampling config
        sampling = (
            convert_generation_config(generation_config)
            if generation_config
            else self._sampling_config
        )

        if isinstance(inputs, torch.Tensor):
            inputs = inputs.tolist()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



