in llm_perf/common/get_top_model_from_hub.py [0:0]
def main():
# Set up authentication (optional, but recommended)
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
if huggingface_token:
os.environ["HUGGINGFACE_HUB_TOKEN"] = huggingface_token
else:
print(
"Warning: HUGGINGFACE_TOKEN not found in environment variables. Running without authentication."
)
n = 100
top_models = get_top_text_generation_models(n)
print(f"\nTop {n} text generation models on Hugging Face Hub:")
for i, model in enumerate(top_models, 1):
print(
f"{i}. {model['organization']}/{model['model_name']}: {model['downloads']:,} downloads"
)
# Upload to Hugging Face dataset
dataset_name = "optimum-benchmark/top-text-generation-models"
upload_to_hf_dataset(top_models, dataset_name)
# Display top 10 organizations by downloads
print("\nTop 10 organizations by total downloads:")
org_downloads = compute_org_downloads(top_models)
sorted_orgs = sorted(org_downloads.items(), key=lambda x: x[1], reverse=True)[:10]
for i, (org, downloads) in enumerate(sorted_orgs, 1):
print(f"{i}. {org}: {downloads:,} downloads")