def fetch_multilingual_vulnerability_dataset()

in pyrit/datasets/multilingual_vulnerability_dataset.py [0:0]


def fetch_multilingual_vulnerability_dataset() -> SeedPromptDataset:
    """
    Fetch multilingual vulnerability examples from "A Framework to Assess Multilingual Vulnerabilities of LLMs"
    and create a SeedPromptDataset.

    Returns:
        SeedPromptDataset: A SeedPromptDataset containing the examples.
    """
    url = "https://raw.githubusercontent.com/CarsonDon/Multilingual-Vuln-LLMs/main/prompts/allprompt.csv"
    df = pd.read_csv(url)

    seed_prompts = [
        SeedPrompt(
            value=row["en"],
            data_type="text",
            name=str(row["id"]),
            dataset_name="Multilingual-Vulnerability",
            harm_categories=[row["type"]],
            description="Dataset from 'A Framework to Assess Multilingual Vulnerabilities of LLMs'. "
            "Multilingual prompts demonstrating LLM vulnerabilities, labeled by type. "
            "Paper: https://arxiv.org/pdf/2503.13081",
            authors="Likai Tang, Niruth Bogahawatta, Yasod Ginige, "
            "Jiarui Xu, Shixuan Sun, Surangika Ranathunga, Suranga Seneviratne",
            source="https://github.com/CarsonDon/Multilingual-Vuln-LLMs",
        )
        for _, row in df.iterrows()
    ]

    seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
    return seed_prompt_dataset