def trtllm_kwargs()

in optimum_benchmark/backends/tensorrt_llm/backend.py [0:0]


    def trtllm_kwargs(self):
        kwargs = {}

        if self.config.tp is not None:
            kwargs["tp"] = self.config.tp

        if self.config.pp is not None:
            kwargs["pp"] = self.config.pp

        if self.config.dtype is not None:
            kwargs["dtype"] = self.config.dtype

        if self.config.use_fp8 is not None:
            kwargs["use_fp8"] = self.config.use_fp8

        if self.config.world_size is not None:
            kwargs["world_size"] = self.config.world_size

        if self.config.gpus_per_node is not None:
            kwargs["gpus_per_node"] = self.config.gpus_per_node

        if self.config.max_input_len is not None:
            kwargs["max_input_len"] = self.config.max_input_len

        if self.config.max_output_len is not None:
            kwargs["max_output_len"] = self.config.max_output_len

        if self.config.max_batch_size is not None:
            kwargs["max_batch_size"] = self.config.max_batch_size

        if self.config.max_new_tokens is not None:
            kwargs["max_new_tokens"] = self.config.max_new_tokens

        if self.config.max_prompt_length is not None:
            kwargs["max_prompt_length"] = self.config.max_prompt_length

        if self.config.optimization_level is not None:
            kwargs["optimization_level"] = self.config.optimization_level

        if self.config.use_cuda_graph is not None:
            kwargs["use_cuda_graph"] = self.config.use_cuda_graph

        return kwargs