evaluations/qa_quality_eval.py (84 lines of code) (raw):
import os
import json
from datetime import datetime
from azure.identity import DefaultAzureCredential
from promptflow.client import PFClient
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluate import evaluate
from promptflow.evals.evaluators import RelevanceEvaluator, FluencyEvaluator, GroundednessEvaluator, CoherenceEvaluator
def main():
# Read environment variables
azure_location = os.getenv("AZURE_LOCATION")
azure_subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
azure_resource_group = os.getenv("AZURE_RESOURCE_GROUP")
azure_project_name = os.getenv("AZUREAI_PROJECT_NAME")
prefix = os.getenv("PREFIX", datetime.now().strftime("%y%m%d%H%M%S"))[:14]
print("AZURE_LOCATION =", azure_location)
print("AZURE_SUBSCRIPTION_ID =", azure_subscription_id)
print("AZURE_RESOURCE_GROUP =", azure_resource_group)
print("AZUREAI_PROJECT_NAME=", azure_project_name)
print("PREFIX =", prefix)
##################################
## Base Run
##################################
pf = PFClient()
flow = "./src" # path to the flow
data = "./evaluations/test-dataset.jsonl" # path to the data file
# base run
base_run = pf.run(
flow=flow,
data=data,
column_mapping={
"question": "${data.question}",
"chat_history": []
},
stream=True,
)
responses = pf.get_details(base_run)
print(responses.head(10))
# Convert to jsonl
relevant_columns = responses[['inputs.question', 'inputs.chat_history', 'outputs.answer', 'outputs.context']]
relevant_columns.columns = ['question', 'chat_history', 'answer', 'context']
data_list = relevant_columns.to_dict(orient='records')
with open('responses.jsonl', 'w') as f:
for item in data_list:
f.write(json.dumps(item) + '\n')
##################################
## Evaluation
##################################
# Initialize Azure OpenAI Connection with your environment variables
model_config = AzureOpenAIModelConfiguration(
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
api_version=os.environ.get("AZURE_OPENAI_API_VERSION"),
)
azure_ai_project = {
"subscription_id": os.getenv("AZURE_SUBSCRIPTION_ID"),
"resource_group_name": os.getenv("AZURE_RESOURCE_GROUP"),
"project_name": os.getenv("AZUREAI_PROJECT_NAME"),
}
# https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/flow-evaluate-sdk
fluency_evaluator = FluencyEvaluator(model_config=model_config)
groundedness_evaluator = GroundednessEvaluator(model_config=model_config)
relevance_evaluator = RelevanceEvaluator(model_config=model_config)
coherence_evaluator = CoherenceEvaluator(model_config=model_config)
data = "./responses.jsonl" # path to the data file
try:
result = evaluate(
evaluation_name=f"{prefix} Quality Evaluation",
data=data,
evaluators={
"Fluency": fluency_evaluator,
"Groundedness": groundedness_evaluator,
"Relevance": relevance_evaluator,
"Coherence": coherence_evaluator
},
azure_ai_project=azure_ai_project,
output_path="./qa_flow_quality_eval.json"
)
except Exception as e:
print(f"An error occurred during evaluation: {e}\n Retrying without reporting results in Azure AI Project.")
result = evaluate(
evaluation_name=f"{prefix} Quality Evaluation",
data=data,
evaluators={
"Fluency": fluency_evaluator,
"Groundedness": groundedness_evaluator,
"Relevance": relevance_evaluator,
"Coherence": coherence_evaluator
},
output_path="./qa_flow_quality_eval.json"
)
if __name__ == '__main__':
import promptflow as pf
main()