def main()

in load_tests/benchmarks.py [0:0]


def main(sha, results_file):
    results_dir = "results"
    # get absolute path
    results_dir = os.path.join(os.path.dirname(__file__), results_dir)
    logger.info("Starting benchmark")
    models = [
        ("meta-llama/Llama-3.1-8B-Instruct", 1),
        # ('meta-llama/Llama-3.1-70B-Instruct', 4),
        # ('mistralai/Mixtral-8x7B-Instruct-v0.1', 2),
    ]
    success = True
    for model in models:
        tgi_runner = TGIDockerRunner(model[0])
        # create results directory
        model_dir = os.path.join(
            results_dir, f'{model[0].replace("/", "_").replace(".", "_")}'
        )
        os.makedirs(model_dir, exist_ok=True)
        runner = BenchmarkRunner(
            volumes=[(model_dir, "/opt/text-generation-inference-benchmark/results")]
        )
        try:
            tgi_runner.run([("max-concurrent-requests", 512)], gpus=model[1])
            logger.info(f"TGI started for model {model[0]}")
            parameters = [
                ("tokenizer-name", model[0]),
                ("max-vus", 800),
                ("url", "http://localhost:8080"),
                ("duration", "120s"),
                ("warmup", "30s"),
                ("benchmark-kind", "rate"),
                (
                    "prompt-options",
                    "num_tokens=200,max_tokens=220,min_tokens=180,variance=10",
                ),
                (
                    "decode-options",
                    "num_tokens=200,max_tokens=220,min_tokens=180,variance=10",
                ),
                (
                    "extra-meta",
                    f'"engine=TGI,tp={model[1]},version={sha},gpu={get_gpu_name()}"',
                ),
                ("no-console", None),
            ]
            rates = [("rates", f"{r / 10.}") for r in list(range(8, 248, 8))]
            parameters.extend(rates)
            runner.run(parameters, f"container:{tgi_runner.container.id}")
        except Exception as e:
            logger.error(f"Error running benchmark for model {model[0]}: {e}")
            # print the stack trace
            print(traceback.format_exc())
            success = False
        finally:
            tgi_runner.stop()
            runner.stop()
    if not success:
        logger.error("Some benchmarks failed")
        exit(1)

    df = pd.DataFrame()
    # list recursively directories
    directories = [
        f"{results_dir}/{d}"
        for d in os.listdir(results_dir)
        if os.path.isdir(f"{results_dir}/{d}")
    ]
    logger.info(f"Found result directories: {directories}")
    for directory in directories:
        data_files = {}
        for filename in os.listdir(directory):
            if filename.endswith(".json"):
                data_files[filename.split(".")[-2]] = f"{directory}/{filename}"
        logger.info(f"Processing directory {directory}")
        df = pd.concat([df, build_df(directory.split("/")[-1], data_files)])
    df["device"] = get_gpu_name()
    df["error_rate"] = (
        df["failed_requests"]
        / (df["failed_requests"] + df["successful_requests"])
        * 100.0
    )
    df.to_parquet(results_file)