llm_perf/update_llm_perf_leaderboard.py (98 lines of code) (raw):

import subprocess from glob import glob import os import pandas as pd from huggingface_hub import create_repo, snapshot_download, upload_file, repo_exists from optimum_benchmark import Benchmark import json from llm_perf.common.hardware_config import load_hardware_configs from huggingface_hub.utils import disable_progress_bars disable_progress_bars() REPO_TYPE = "dataset" MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard" PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}" DATA_DIR = "data" PERF_DF = os.path.join(DATA_DIR, "perf-df-{backend}-{hardware}-{subset}-{machine}.csv") LLM_DF = os.path.join(DATA_DIR, "llm-df.csv") # Create data directory if it doesn't exist os.makedirs(DATA_DIR, exist_ok=True) def patch_json(file): """ Patch a JSON file by adding a 'stdev_' key with the same value as 'stdev' for all occurrences, but only if 'stdev_' doesn't already exist at the same level. This is to make the old optimum benchmark compatible with the new one. This function reads a JSON file, recursively traverses the data structure, and for each dictionary that contains a 'stdev' key without a corresponding 'stdev_' key, it adds a 'stdev_' key with the same value. The modified data is then written back to the file. Args: file (str): The path to the JSON file to be patched. Returns: None """ with open(file, "r") as f: data = json.load(f) def add_stdev_(obj): if isinstance(obj, dict): new_items = [] for key, value in obj.items(): if key == "stdev" and "stdev_" not in obj: new_items.append(("stdev_", value)) if isinstance(value, (dict, list)): add_stdev_(value) for key, value in new_items: obj[key] = value elif isinstance(obj, list): for item in obj: add_stdev_(item) add_stdev_(data) with open(file, "w") as f: json.dump(data, f, indent=4) def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str): """ Gather the benchmarks for a given machine """ perf_repo_id = PERF_REPO_ID.format( subset=subset, machine=machine, backend=backend, hardware=hardware ) snapshot = snapshot_download( repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"], ) dfs = [] for file in glob(f"{snapshot}/**/benchmark.json", recursive=True): patch_json(file) dfs.append(Benchmark.from_json(file).to_dataframe()) benchmarks = pd.concat(dfs, ignore_index=True) perf_df = PERF_DF.format( subset=subset, machine=machine, backend=backend, hardware=hardware ) benchmarks.to_csv(perf_df, index=False) create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, private=False, exist_ok=True) upload_file( repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=perf_df, path_or_fileobj=perf_df, ) print(f"Uploaded {perf_df} to {MAIN_REPO_ID}") # def check_if_url_exists(url: str): # """ # Check if a URL exists # """ # repo_exists # print(f"response: {response}") # return response.status_code == 200 def update_perf_dfs(): """ Update the performance dataframes for all machines """ hardware_configs = load_hardware_configs("llm_perf/hardware.yaml") for hardware_config in hardware_configs: for subset in hardware_config.subsets: for backend in hardware_config.backends: try: gather_benchmarks( subset, hardware_config.machine, backend, hardware_config.hardware, ) except Exception: print("Dataset not found for:") print(f" • Backend: {backend}") print(f" • Subset: {subset}") print(f" • Machine: {hardware_config.machine}") print(f" • Hardware Type: {hardware_config.hardware}") url = f"{PERF_REPO_ID.format(subset=subset, machine=hardware_config.machine, backend=backend, hardware=hardware_config.hardware)}" does_exist = repo_exists(url, repo_type="dataset") if does_exist: print(f"Dataset exists: {url} but could not be processed") def update_llm_df(): """ Scrape the open-llm-leaderboard and update the leaderboard dataframe """ scrapping_script = """ git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git pip install -r scrape-open-llm-leaderboard/requirements.txt -q python scrape-open-llm-leaderboard/main.py rm -rf scrape-open-llm-leaderboard """ subprocess.run(scrapping_script, shell=True) create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False) upload_file( repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=LLM_DF, path_or_fileobj="open-llm-leaderboard.csv", ) def update_llm_perf_leaderboard(): # update_llm_df() # TO FIX: open-llm scraper is broken otherwise use https://huggingface.co/datasets/open-llm-leaderboard/contents directly update_perf_dfs() if __name__ == "__main__": update_llm_perf_leaderboard()