In [None]:
#%pip install azure-ai-evaluation

In [None]:
import os
from pprint import pprint
from dotenv import load_dotenv
load_dotenv("../.credentials.env")

In [None]:
print(os.environ.get("AZURE_OPENAI_DEPLOYMENT"))
print(os.environ.get("AZURE_PROJECT_NAME"))

In [None]:
from azure.identity import DefaultAzureCredential
credential = DefaultAzureCredential()

from azure.ai.evaluation import GroundednessProEvaluator, GroundednessEvaluator
from azure.ai.evaluation import (
    RelevanceEvaluator,
    CoherenceEvaluator,
    FluencyEvaluator,
    SimilarityEvaluator,
)

# Initialize Azure AI project and Azure OpenAI conncetion with your environment variables
azure_ai_project = {
    "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
    "resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP"),
    "project_name": os.environ.get("AZURE_PROJECT_NAME"),
}

model_config = {
    "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
    "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
    "api_version": os.environ.get("AZURE_OPENAI_API_VERSION"),
}

# Initialzing Groundedness and Groundedness Pro evaluators
groundedness_eval = GroundednessEvaluator(model_config)
#groundedness_pro_eval = GroundednessProEvaluator(azure_ai_project=azure_ai_project, credential=credential)
relevance_eval = RelevanceEvaluator(model_config)
coherence_eval = CoherenceEvaluator(model_config)
fluency_eval = FluencyEvaluator(model_config)
similarity_eval = SimilarityEvaluator(model_config)

query_response = dict(
    query="Which tent is the most waterproof?",
    context="The  Mountain Warehouse Tent is the most water-proof of all tents available.",
    response="The Mountain Warehouse Tent is the most waterproof.",
)

# Running Groundedness Evaluator on a query and response pair
groundedness_score = groundedness_eval(
    **query_response
)
pprint(groundedness_score)


# Running Relevance Evaluator on a query and response pair
relevance_score = relevance_eval(
    **query_response
)
pprint(relevance_score)

# Running Fluency Evaluator on a query and response pair
fluency_score = fluency_eval(
    **query_response
)
pprint(fluency_score)

# Running Coherence Evaluator on a query and response pair
coherence_score = coherence_eval(
    **query_response
)
pprint(coherence_score)
