def register_optimization_profiles_args()

in src/optimum/nvidia/utils/cli.py [0:0]


def register_optimization_profiles_args(parser: ArgumentParser) -> ArgumentParser:
    parser.add_argument(
        "--max-batch-size",
        type=int,
        default=1,
        help="Maximum batch size for the model.",
    )
    parser.add_argument(
        "--max-prompt-length",
        type=int,
        default=128,
        help="Maximum prompt a user can give.",
    )
    parser.add_argument(
        "--max-new-tokens",
        type=int,
        default=1024,
        help="Maximum number of tokens to generate",
    )
    parser.add_argument(
        "--max-beam-width",
        type=int,
        default=1,
        help="Maximum number of beams for sampling",
    )

    return parser