def common_trtllm_export_args()

in src/optimum/nvidia/export/cli.py [0:0]


def common_trtllm_export_args(parser: "ArgumentParser"):
    parser.add_argument("model", type=str, help="Model to export.")

    required_group = parser.add_argument_group("Required arguments")
    required_group.add_argument(
        "--max-input-length",
        type=int,
        default=-1,
        help="Maximum sequence length, in number of tokens, the prompt can be. The maximum number of potential tokens "
        "generated will be <max-output-length> - <max-input-length>.",
    )
    required_group.add_argument(
        "--max-output-length",
        type=int,
        default=-1,
        help="Maximum sequence length, in number of tokens, the model supports.",
    )
    required_group.add_argument(
        "--max-new-tokens", type=int, default=-1, help="Maximum new tokens, "
    )

    multi_gpu_group = parser.add_argument_group("Multi-GPU support arguments")
    multi_gpu_group.add_argument(
        "--tp", type=int, default=1, help="Tensor Parallel degree"
    )
    multi_gpu_group.add_argument(
        "--pp", type=int, default=1, help="Pipeline Parallel degree"
    )

    optional_group = parser.add_argument_group("Optional arguments")
    optional_group.add_argument(
        "-d",
        "--dtype",
        type=str,
        default="auto",
        help="Computational data type used for the model. Default to 'auto' matching model's data type.",
    )
    optional_group.add_argument(
        "--max-batch-size",
        type=int,
        default=1,
        help="Maximum number of concurrent requests the model can process. Default to 1.",
    )
    optional_group.add_argument(
        "--max-beams-width",
        type=int,
        default=1,
        help='Maximum number of sampling paths ("beam") to evaluate when decoding new a token. Default to 1.',
    )
    optional_group.add_argument(
        "-q", "--quantization", type=str, help="Path to a quantization recipe file."
    )
    optional_group.add_argument(
        "--destination",
        type=str,
        default=None,
        help="Folder where the resulting exported engines will be stored. Default to Hugging Face Hub cache.",
    )
    optional_group.add_argument(
        "--push-to-hub", type=str, help="Repository to push generated engines to."
    )