def run_benchmark()

in bench/kernels/benchmark_w4a16.py [0:0]


def run_benchmark(model, tokens=None):
    if tokens is None:
        tokens = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]
    elif not isinstance(tokens, (list, tuple)):
        tokens = [tokens]
    groupsize = 128
    layers = MODELS[model]
    print(model)
    for m in tokens:
        tot_awq = {"s": 0, "TFLOP/s": 0, "GB/s": 0, "speedup": 0}
        tot_marlin = {"s": 0, "TFLOP/s": 0, "GB/s": 0, "speedup": 0}
        for layer in layers:
            k, n = layer
            A, B_ref, B_awq, B_marlin, s, s_marlin, sz, sz_marlin = get_problem(m, n, k, groupsize)
            res_d = benchmark_dense(A, B_ref, m, n, k)
            res_awq = benchmark_awq(A, B_awq, s, sz, m, n, k)
            res_awq["speedup"] = res_d["s"] / res_awq["s"]
            tot_awq["s"] += res_awq["s"]
            for key in tot_awq:
                if key != "s":
                    tot_awq[key] += res_awq[key] * res_awq["s"]
            res_marlin = benchmark_marlin(A, B_marlin, s_marlin, sz_marlin, m, n, k)
            res_marlin["speedup"] = res_d["s"] / res_marlin["s"]
            tot_marlin["s"] += res_marlin["s"]
            for key in tot_marlin:
                if key != "s":
                    tot_marlin[key] += res_marlin[key] * res_marlin["s"]
        for key in tot_awq:
            if key != "s":
                tot_awq[key] /= tot_awq["s"]
        for key in tot_marlin:
            if key != "s":
                tot_marlin[key] /= tot_marlin["s"]
        print(
            "AWQ, tokens=%04d: s=%.5f, TFLOP/s=%07.3f, GB/s=%08.3f, speedup=%.2f"
            % (m, tot_awq["s"], tot_awq["TFLOP/s"], tot_awq["GB/s"], tot_awq["speedup"])
        )
        print(
            "Marlin, batch=%04d: s=%.5f, TFLOP/s=%07.3f, GB/s=%08.3f, speedup=%.2f"
            % (m, tot_marlin["s"], tot_marlin["TFLOP/s"], tot_marlin["GB/s"], tot_marlin["speedup"])
        )