def main()

in bench/kernels/benchmark_w4a16.py [0:0]


def main():
    parser = argparse.ArgumentParser(description="W4A16 Matrix Multiplication Kernel benchmark")
    parser.add_argument(
        "--model", type=str, default=None, help="The model configuration to benchmark. None to test all of them."
    )
    parser.add_argument(
        "--tokens",
        type=int,
        default=None,
        help="The numbers of input tokens used to benchmark. None to test a predefined range.",
    )
    args = parser.parse_args()
    models = MODELS if args.model is None else [args.model]
    for model in models:
        run_benchmark(model, args.tokens)
        print()