Lab1_ai_evaluation/ai_evaluation.ipynb (186 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1733769770945
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"#%pip install azure-ai-evaluation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1733769772370
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"import os\n",
"from pprint import pprint\n",
"from dotenv import load_dotenv\n",
"load_dotenv(\"../.credentials.env\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1733769774341
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"print(os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\"))\n",
"print(os.environ.get(\"AZURE_PROJECT_NAME\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1733769792289
}
},
"outputs": [],
"source": [
"from azure.identity import DefaultAzureCredential\n",
"credential = DefaultAzureCredential()\n",
"\n",
"from azure.ai.evaluation import GroundednessProEvaluator, GroundednessEvaluator\n",
"from azure.ai.evaluation import (\n",
" RelevanceEvaluator,\n",
" CoherenceEvaluator,\n",
" FluencyEvaluator,\n",
" SimilarityEvaluator,\n",
")\n",
"\n",
"# Initialize Azure AI project and Azure OpenAI conncetion with your environment variables\n",
"azure_ai_project = {\n",
" \"subscription_id\": os.environ.get(\"AZURE_SUBSCRIPTION_ID\"),\n",
" \"resource_group_name\": os.environ.get(\"AZURE_RESOURCE_GROUP\"),\n",
" \"project_name\": os.environ.get(\"AZURE_PROJECT_NAME\"),\n",
"}\n",
"\n",
"model_config = {\n",
" \"azure_endpoint\": os.environ.get(\"AZURE_OPENAI_ENDPOINT\"),\n",
" \"api_key\": os.environ.get(\"AZURE_OPENAI_API_KEY\"),\n",
" \"azure_deployment\": os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\"),\n",
" \"api_version\": os.environ.get(\"AZURE_OPENAI_API_VERSION\"),\n",
"}\n",
"\n",
"# Initialzing Groundedness and Groundedness Pro evaluators\n",
"groundedness_eval = GroundednessEvaluator(model_config)\n",
"#groundedness_pro_eval = GroundednessProEvaluator(azure_ai_project=azure_ai_project, credential=credential)\n",
"relevance_eval = RelevanceEvaluator(model_config)\n",
"coherence_eval = CoherenceEvaluator(model_config)\n",
"fluency_eval = FluencyEvaluator(model_config)\n",
"similarity_eval = SimilarityEvaluator(model_config)\n",
"\n",
"query_response = dict(\n",
" query=\"Which tent is the most waterproof?\",\n",
" context=\"The Mountain Warehouse Tent is the most water-proof of all tents available.\",\n",
" response=\"The Mountain Warehouse Tent is the most waterproof.\",\n",
")\n",
"\n",
"# Running Groundedness Evaluator on a query and response pair\n",
"groundedness_score = groundedness_eval(\n",
" **query_response\n",
")\n",
"pprint(groundedness_score)\n",
"\n",
"\n",
"# Running Relevance Evaluator on a query and response pair\n",
"relevance_score = relevance_eval(\n",
" **query_response\n",
")\n",
"pprint(relevance_score)\n",
"\n",
"# Running Fluency Evaluator on a query and response pair\n",
"fluency_score = fluency_eval(\n",
" **query_response\n",
")\n",
"pprint(fluency_score)\n",
"\n",
"# Running Coherence Evaluator on a query and response pair\n",
"coherence_score = coherence_eval(\n",
" **query_response\n",
")\n",
"pprint(coherence_score)\n"
]
}
],
"metadata": {
"kernel_info": {
"name": "python310-sdkv2"
},
"kernelspec": {
"display_name": "aoai_eval",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
},
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}