in scripts/benchmark_pipelines.py [0:0]
def get_trtllm_pipeline(args: Namespace):
return pipeline(
model=args.model,
use_fp8=args.use_fp8,
use_cuda_graph=args.use_cuda_graph,
max_batch_size=args.batch_size,
max_prompt_length=args.prompt_length,
max_new_tokens=args.max_new_tokens,
tp=args.tp,
pp=args.pp,
gpus_per_node=args.gpus_per_node,
world_size=args.world_size,
dtype=args.dtype,
)