in bench/kernels/benchmark_w4a16.py [0:0]
def main():
parser = argparse.ArgumentParser(description="W4A16 Matrix Multiplication Kernel benchmark")
parser.add_argument(
"--model", type=str, default=None, help="The model configuration to benchmark. None to test all of them."
)
parser.add_argument(
"--tokens",
type=int,
default=None,
help="The numbers of input tokens used to benchmark. None to test a predefined range.",
)
args = parser.parse_args()
models = MODELS if args.model is None else [args.model]
for model in models:
run_benchmark(model, args.tokens)
print()