in src/optimum/nvidia/utils/cli.py [0:0]
def register_optimization_profiles_args(parser: ArgumentParser) -> ArgumentParser:
parser.add_argument(
"--max-batch-size",
type=int,
default=1,
help="Maximum batch size for the model.",
)
parser.add_argument(
"--max-prompt-length",
type=int,
default=128,
help="Maximum prompt a user can give.",
)
parser.add_argument(
"--max-new-tokens",
type=int,
default=1024,
help="Maximum number of tokens to generate",
)
parser.add_argument(
"--max-beam-width",
type=int,
default=1,
help="Maximum number of beams for sampling",
)
return parser