evaluations/safety_eval.py (131 lines of code) (raw):
import asyncio
import json
import os
from datetime import datetime
from typing import List, Dict, Any
from azure.identity import DefaultAzureCredential
from promptflow.evals.evaluate import evaluate
from promptflow.evals.evaluators import SexualEvaluator, ViolenceEvaluator, SelfHarmEvaluator, HateUnfairnessEvaluator
from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator
from chat_request import get_response
async def callback(
messages: List[Dict],
stream: bool = False,
session_state: Any = None,
) -> dict:
query = messages["messages"][0]["content"]
context = None
# Add file contents for summarization or re-write
if 'file_content' in messages["template_parameters"]:
query += messages["template_parameters"]['file_content']
response = get_response(query, [])['answer']
# Format responses in OpenAI message protocol
formatted_response = {
"content": response,
"role": "assistant",
"context": {},
}
messages["messages"].append(formatted_response)
return {
"messages": messages["messages"],
"stream": stream,
"session_state": session_state
}
async def main():
# Read environment variables
azure_location = os.getenv("AZURE_LOCATION")
azure_subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
azure_resource_group = os.getenv("AZURE_RESOURCE_GROUP")
azure_project_name = os.getenv("AZUREAI_PROJECT_NAME")
prefix = os.getenv("PREFIX", datetime.now().strftime("%y%m%d%H%M%S"))[:14]
print("AZURE_LOCATION=", azure_location)
print("AZURE_SUBSCRIPTION_ID=", azure_subscription_id)
print("AZURE_RESOURCE_GROUP=", azure_resource_group)
print("AZUREAI_PROJECT_NAME=", azure_project_name)
print("PREFIX=", prefix)
valid_locations = ["eastus2", "francecentral", "uksouth", "swedencentral"]
if azure_location not in valid_locations:
print(f"Invalid AZURE_LOCATION: {azure_location}. Must be one of {valid_locations}.")
else:
# Import the built-in safety evaluators
azure_ai_project = {
"subscription_id": azure_subscription_id,
"resource_group_name": azure_resource_group,
"project_name": azure_project_name,
}
sexual_evaluator = SexualEvaluator(azure_ai_project)
self_harm_evaluator = SelfHarmEvaluator(azure_ai_project)
hate_unfairness_evaluator = HateUnfairnessEvaluator(azure_ai_project)
violence_evaluator = ViolenceEvaluator(azure_ai_project)
scenario = AdversarialScenario.ADVERSARIAL_QA
azure_ai_project["credential"] = DefaultAzureCredential()
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project)
outputs = await simulator(
scenario=scenario,
target=callback,
max_conversation_turns=1,
max_simulation_results=10,
jailbreak=False
)
adversarial_conversation_result = outputs.to_eval_qa_json_lines()
print(f"Adversarial conversation results: {adversarial_conversation_result}.")
try:
azure_ai_project["credential"] = ""
adversarial_eval_result = evaluate(
evaluation_name=f"{prefix} Adversarial Tests",
data=adversarial_conversation_result,
evaluators={
"sexual": sexual_evaluator,
"self_harm": self_harm_evaluator,
"hate_unfairness": hate_unfairness_evaluator,
"violence": violence_evaluator
},
azure_ai_project= azure_ai_project,
output_path="./adversarial_test.json"
)
except Exception as e:
print(f"An error occurred during evaluation: {e}\n Retrying without reporting results in Azure AI Project.")
adversarial_eval_result = evaluate(
evaluation_name=f"{prefix} Adversarial Tests",
data=adversarial_conversation_result,
evaluators={
"sexual": sexual_evaluator,
"self_harm": self_harm_evaluator,
"hate_unfairness": hate_unfairness_evaluator,
"violence": violence_evaluator
},
output_path="./adversarial_test.json"
)
jb_outputs = await simulator(
scenario=scenario,
target=callback,
max_simulation_results=10,
jailbreak=True
)
adversarial_conversation_result_w_jailbreak = jb_outputs.to_eval_qa_json_lines()
print(f"Adversarial conversation w/ jailbreak results: {adversarial_conversation_result_w_jailbreak}.")
try:
adversarial_eval_w_jailbreak_result = evaluate(
evaluation_name=f"{prefix} Adversarial Tests w/ Jailbreak",
data=adversarial_conversation_result_w_jailbreak,
evaluators={
"sexual": sexual_evaluator,
"self_harm": self_harm_evaluator,
"hate_unfairness": hate_unfairness_evaluator,
"violence": violence_evaluator
},
azure_ai_project=azure_ai_project,
output_path="./adversarial_test_w_jailbreak.json"
)
except Exception as e:
print(f"An error occurred during evaluation: {e}\n Retrying without reporting results in Azure AI Project.")
adversarial_eval_w_jailbreak_result = evaluate(
evaluation_name=f"{prefix} Adversarial Tests w/ Jailbreak",
data=adversarial_conversation_result_w_jailbreak,
evaluators={
"sexual": sexual_evaluator,
"self_harm": self_harm_evaluator,
"hate_unfairness": hate_unfairness_evaluator,
"violence": violence_evaluator
},
output_path="./adversarial_test_w_jailbreak.json"
)
if __name__ == '__main__':
import promptflow as pf
asyncio.run(main())