def run_trainer()

in pyrit/auxiliary_attacks/gcg/experiments/run.py [0:0]


def run_trainer(*, model_name: str, setup: str = "single", **extra_config_parameters):
    """
    Trains and generates adversarial suffix - single model single prompt

    Args:
        model_name (str): The name of the model, currently supports:
            "mistral", "llama_2", "llama_3", "vicuna", "phi_3_mini", "all_models"
        setup (str): Identifier for the setup, currently supporst
            - "single": one prompt one model
            - "multiple": multiple prompts one model or multiple prompts multiple models

    """

    if model_name not in MODEL_NAMES:
        raise ValueError(
            "Model name not supported. Currently supports 'mistral', 'llama_2', 'llama_3', 'vicuna', and 'phi_3_mini'"
        )

    _load_environment_files()
    hf_token = os.environ.get("HUGGINGFACE_TOKEN")
    if not hf_token:
        raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
    runtime_config: Dict[str, Union[str, bool, Any]] = {
        "train_data": (
            "https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv"
        ),
        "result_prefix": f"results/individual_behaviors_{model_name}_gcg",
        "token": hf_token,
    }
    if setup != "single":
        runtime_config["progressive_goals"] = True
        runtime_config["stop_on_success"] = True
        config_name = "transfer"
    else:
        config_name = "individual"

    config = _load_yaml_to_dict(f"configs/{config_name}_{model_name}.yaml")

    config.update(runtime_config)
    config.update(extra_config_parameters)
    config["model_name"] = model_name

    trainer = GreedyCoordinateGradientAdversarialSuffixGenerator()
    if not os.path.exists("results"):
        os.makedirs("results")

    trainer.generate_suffix(**config)