def get_top_llm_list()

in llm_perf/common/utils.py [0:0]


def get_top_llm_list(n: int = 10) -> list[str]:
    """
    Fetches the top n text generation models from the Hugging Face dataset.

    Args:
        n (int): Number of top models to retrieve. Defaults to 10.

    Returns:
        list: A list of strings representing the top n models in the format "organization/model_name".
    """
    try:
        # Download the dataset from the Hugging Face Hub
        from datasets import load_dataset

        ds = load_dataset("optimum-benchmark/top-text-generation-models")

        # Get the data from the dataset
        models_data = ds["train"].to_pandas().to_dict("records")

        # sort by downloads
        models_data = sorted(models_data, key=lambda x: x["downloads"], reverse=True)

        # Create the list of top models
        top_models = [
            f"{model['organization']}/{model['model_name']}"
            for model in models_data[:n]
        ]

        return top_models
    except Exception as e:
        print(f"Error fetching top LLM list: {e}")
        return []