code/embedding-function/utilities/helpers/env_helper.py (297 lines of code) (raw):
import json
import os
import logging
import threading
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.keyvault.secrets import SecretClient
logger = logging.getLogger(__name__)
class EnvHelper:
_instance = None
_lock = threading.Lock()
def __new__(cls):
with cls._lock:
if cls._instance is None:
instance = super(EnvHelper, cls).__new__(cls)
instance.__load_config()
cls._instance = instance
return cls._instance
def __load_config(self, **kwargs) -> None:
load_dotenv()
logger.info("Initializing EnvHelper")
# Wrapper for Azure Key Vault
self.secretHelper = SecretHelper()
self.LOGLEVEL = os.environ.get("LOGLEVEL", "INFO").upper()
# Azure
self.AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID", "")
self.AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP", "")
# Azure Search
self.AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "")
self.AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX", "")
self.AZURE_SEARCH_USE_SEMANTIC_SEARCH = self.get_env_var_bool(
"AZURE_SEARCH_USE_SEMANTIC_SEARCH", "False"
)
self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.getenv(
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default"
)
self.AZURE_SEARCH_INDEX_IS_PRECHUNKED = os.getenv(
"AZURE_SEARCH_INDEX_IS_PRECHUNKED", ""
)
self.AZURE_SEARCH_FILTER = os.getenv("AZURE_SEARCH_FILTER", "")
self.AZURE_SEARCH_TOP_K = self.get_env_var_int("AZURE_SEARCH_TOP_K", 5)
self.AZURE_SEARCH_ENABLE_IN_DOMAIN = (
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
)
self.AZURE_SEARCH_FIELDS_ID = os.getenv("AZURE_SEARCH_FIELDS_ID", "id")
self.AZURE_SEARCH_CONTENT_COLUMN = os.getenv(
"AZURE_SEARCH_CONTENT_COLUMN", "content"
)
self.AZURE_SEARCH_CONTENT_VECTOR_COLUMN = os.getenv(
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN", "content_vector"
)
self.AZURE_SEARCH_DIMENSIONS = os.getenv("AZURE_SEARCH_DIMENSIONS", "1536")
self.AZURE_SEARCH_FILENAME_COLUMN = os.getenv(
"AZURE_SEARCH_FILENAME_COLUMN", "filepath"
)
self.AZURE_SEARCH_TITLE_COLUMN = os.getenv("AZURE_SEARCH_TITLE_COLUMN", "title")
self.AZURE_SEARCH_URL_COLUMN = os.getenv("AZURE_SEARCH_URL_COLUMN", "url")
self.AZURE_SEARCH_FIELDS_TAG = os.getenv("AZURE_SEARCH_FIELDS_TAG", "tag")
self.AZURE_SEARCH_FIELDS_METADATA = os.getenv(
"AZURE_SEARCH_FIELDS_METADATA", "metadata"
)
self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv(
"AZURE_SEARCH_SOURCE_COLUMN", "source"
)
self.AZURE_SEARCH_CHUNK_COLUMN = os.getenv("AZURE_SEARCH_CHUNK_COLUMN", "chunk")
self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv(
"AZURE_SEARCH_OFFSET_COLUMN", "offset"
)
self.AZURE_SEARCH_SHAREPOINT_FILE_ID_COLUMN = os.getenv(
"AZURE_SEARCH_SHAREPOINT_FILE_ID_COLUMN", "sharepoint_file_id"
)
self.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = os.getenv(
"AZURE_SEARCH_CONVERSATIONS_LOG_INDEX", "conversations"
)
self.AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE = os.getenv(
"AZURE_SEARCH_DOC_UPLOAD_BATCH_SIZE", 100
)
# Integrated Vectorization
self.AZURE_SEARCH_DATASOURCE_NAME = os.getenv(
"AZURE_SEARCH_DATASOURCE_NAME", ""
)
self.AZURE_SEARCH_INDEXER_NAME = os.getenv("AZURE_SEARCH_INDEXER_NAME", "")
self.USE_ADVANCED_IMAGE_PROCESSING = self.get_env_var_bool(
"USE_ADVANCED_IMAGE_PROCESSING", "False"
)
self.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = self.get_env_var_bool(
"AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION", "False"
)
self.AZURE_AUTH_TYPE = os.getenv("AZURE_AUTH_TYPE", "keys")
# Azure OpenAI
self.AZURE_OPENAI_RESOURCE = os.getenv("AZURE_OPENAI_RESOURCE", "")
# Fetch AZURE_OPENAI_MODEL_INFO from environment
azure_openai_model_info = self.get_info_from_env("AZURE_OPENAI_MODEL_INFO", "")
if azure_openai_model_info:
# If AZURE_OPENAI_MODEL_INFO exists
self.AZURE_OPENAI_MODEL = azure_openai_model_info.get("model", "")
self.AZURE_OPENAI_MODEL_NAME = azure_openai_model_info.get("modelName", "")
else:
# Otherwise, fallback to individual environment variables
self.AZURE_OPENAI_MODEL = os.getenv(
"AZURE_OPENAI_MODEL", "gpt-4o"
)
self.AZURE_OPENAI_MODEL_NAME = os.getenv(
"AZURE_OPENAI_MODEL_NAME", "gpt-4o"
)
self.AZURE_OPENAI_VISION_MODEL = os.getenv("AZURE_OPENAI_VISION_MODEL", "gpt-4")
self.AZURE_OPENAI_TEMPERATURE = os.getenv("AZURE_OPENAI_TEMPERATURE", "0")
self.AZURE_OPENAI_TOP_P = os.getenv("AZURE_OPENAI_TOP_P", "1.0")
self.AZURE_OPENAI_MAX_TOKENS = os.getenv("AZURE_OPENAI_MAX_TOKENS", "1000")
self.AZURE_OPENAI_STOP_SEQUENCE = os.getenv("AZURE_OPENAI_STOP_SEQUENCE", "")
self.AZURE_OPENAI_SYSTEM_MESSAGE = os.getenv(
"AZURE_OPENAI_SYSTEM_MESSAGE",
"You are an AI assistant that helps people find information.",
)
self.AZURE_OPENAI_API_VERSION = os.getenv(
"AZURE_OPENAI_API_VERSION", "2024-02-01"
)
self.AZURE_OPENAI_STREAM = os.getenv("AZURE_OPENAI_STREAM", "true")
# Fetch AZURE_OPENAI_EMBEDDING_MODEL_INFO from environment
azure_openai_embedding_model_info = self.get_info_from_env(
"AZURE_OPENAI_EMBEDDING_MODEL_INFO", ""
)
if azure_openai_embedding_model_info:
# If AZURE_OPENAI_EMBEDDING_MODEL_INFO exists
self.AZURE_OPENAI_EMBEDDING_MODEL = azure_openai_embedding_model_info.get(
"model", ""
)
else:
# Otherwise, fallback to individual environment variable
self.AZURE_OPENAI_EMBEDDING_MODEL = os.getenv(
"AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002"
)
self.SHOULD_STREAM = (
True if self.AZURE_OPENAI_STREAM.lower() == "true" else False
)
self.AZURE_TOKEN_PROVIDER = get_bearer_token_provider(
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)
self.ADVANCED_IMAGE_PROCESSING_MAX_IMAGES = self.get_env_var_int(
"ADVANCED_IMAGE_PROCESSING_MAX_IMAGES", 1
)
self.AZURE_COMPUTER_VISION_ENDPOINT = os.getenv(
"AZURE_COMPUTER_VISION_ENDPOINT"
)
self.AZURE_COMPUTER_VISION_TIMEOUT = self.get_env_var_float(
"AZURE_COMPUTER_VISION_TIMEOUT", 30
)
self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION = os.getenv(
"AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_API_VERSION", "2024-02-01"
)
self.AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION = os.getenv(
"AZURE_COMPUTER_VISION_VECTORIZE_IMAGE_MODEL_VERSION", "2023-04-15"
)
# Initialize Azure keys based on authentication type and environment settings.
# When AZURE_AUTH_TYPE is "rbac", azure keys are None or an empty string.
if self.AZURE_AUTH_TYPE == "rbac":
self.AZURE_SEARCH_KEY = None
self.AZURE_OPENAI_API_KEY = ""
self.AZURE_SPEECH_KEY = None
self.AZURE_COMPUTER_VISION_KEY = None
else:
self.AZURE_SEARCH_KEY = self.secretHelper.get_secret("AZURE_SEARCH_KEY")
self.AZURE_OPENAI_API_KEY = self.secretHelper.get_secret(
"AZURE_OPENAI_API_KEY"
)
self.AZURE_SPEECH_KEY = self.secretHelper.get_secret(
"AZURE_SPEECH_SERVICE_KEY"
)
self.AZURE_COMPUTER_VISION_KEY = self.secretHelper.get_secret(
"AZURE_COMPUTER_VISION_KEY"
)
# Set env for Azure OpenAI
self.AZURE_OPENAI_ENDPOINT = os.environ.get(
"AZURE_OPENAI_ENDPOINT",
f"https://{self.AZURE_OPENAI_RESOURCE}.openai.azure.com/",
)
# Set env for OpenAI SDK
self.OPENAI_API_TYPE = "azure" if self.AZURE_AUTH_TYPE == "keys" else "azure_ad"
self.OPENAI_API_KEY = self.AZURE_OPENAI_API_KEY
self.OPENAI_API_VERSION = self.AZURE_OPENAI_API_VERSION
os.environ["OPENAI_API_TYPE"] = self.OPENAI_API_TYPE
os.environ["OPENAI_API_KEY"] = self.OPENAI_API_KEY
os.environ["OPENAI_API_VERSION"] = self.OPENAI_API_VERSION
# Azure Functions - Batch processing
self.BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:7071")
self.FUNCTION_KEY = os.getenv("FUNCTION_KEY")
self.AzureWebJobsStorage = os.getenv("AzureWebJobsStorage", "")
self.DOCUMENT_PROCESSING_QUEUE_NAME = os.getenv(
"DOCUMENT_PROCESSING_QUEUE_NAME", "doc-processing"
)
# Azure Blob Storage
azure_blob_storage_info = self.get_info_from_env("AZURE_BLOB_STORAGE_INFO", "")
if azure_blob_storage_info:
# If AZURE_BLOB_STORAGE_INFO exists
self.AZURE_BLOB_ACCOUNT_NAME = azure_blob_storage_info.get(
"accountName", ""
)
self.AZURE_BLOB_ACCOUNT_KEY = self.secretHelper.get_secret_from_json(
azure_blob_storage_info.get("accountKey", "")
)
self.AZURE_BLOB_CONTAINER_NAME = azure_blob_storage_info.get(
"containerName", ""
)
else:
# Otherwise, fallback to individual environment variables
self.AZURE_BLOB_ACCOUNT_NAME = os.getenv("AZURE_BLOB_ACCOUNT_NAME", "")
self.AZURE_BLOB_ACCOUNT_KEY = self.secretHelper.get_secret(
"AZURE_BLOB_ACCOUNT_KEY"
)
self.AZURE_BLOB_CONTAINER_NAME = os.getenv("AZURE_BLOB_CONTAINER_NAME", "")
self.AZURE_STORAGE_ACCOUNT_ENDPOINT = os.getenv(
"AZURE_STORAGE_ACCOUNT_ENDPOINT",
f"https://{self.AZURE_BLOB_ACCOUNT_NAME}.blob.core.windows.net/",
)
# Azure Form Recognizer
azure_form_recognizer_info = self.get_info_from_env(
"AZURE_FORM_RECOGNIZER_INFO", ""
)
if azure_form_recognizer_info:
# If AZURE_FORM_RECOGNIZER_INFO exists
self.AZURE_FORM_RECOGNIZER_ENDPOINT = azure_form_recognizer_info.get(
"endpoint", ""
)
self.AZURE_FORM_RECOGNIZER_KEY = self.secretHelper.get_secret_from_json(
azure_form_recognizer_info.get("key", "")
)
else:
# Otherwise, fallback to individual environment variables
self.AZURE_FORM_RECOGNIZER_ENDPOINT = os.getenv(
"AZURE_FORM_RECOGNIZER_ENDPOINT", ""
)
self.AZURE_FORM_RECOGNIZER_KEY = self.secretHelper.get_secret(
"AZURE_FORM_RECOGNIZER_KEY"
)
# Azure App Insights
# APPLICATIONINSIGHTS_ENABLED will be True when the application runs in App Service
self.APPLICATIONINSIGHTS_ENABLED = self.get_env_var_bool(
"APPLICATIONINSIGHTS_ENABLED", "False"
)
# Azure AI Content Safety
self.AZURE_CONTENT_SAFETY_ENDPOINT = os.getenv(
"AZURE_CONTENT_SAFETY_ENDPOINT", ""
)
if (
"https" not in self.AZURE_CONTENT_SAFETY_ENDPOINT
and "api.cognitive.microsoft.com" not in self.AZURE_CONTENT_SAFETY_ENDPOINT
):
self.AZURE_CONTENT_SAFETY_ENDPOINT = self.AZURE_FORM_RECOGNIZER_ENDPOINT
self.AZURE_CONTENT_SAFETY_KEY = self.secretHelper.get_secret(
"AZURE_CONTENT_SAFETY_KEY"
)
# Speech Service
self.AZURE_SPEECH_SERVICE_NAME = os.getenv("AZURE_SPEECH_SERVICE_NAME", "")
self.AZURE_SPEECH_SERVICE_REGION = os.getenv("AZURE_SPEECH_SERVICE_REGION")
self.AZURE_SPEECH_RECOGNIZER_LANGUAGES = self.get_env_var_array(
"AZURE_SPEECH_RECOGNIZER_LANGUAGES", "en-US"
)
self.AZURE_SPEECH_REGION_ENDPOINT = os.environ.get(
"AZURE_SPEECH_REGION_ENDPOINT",
f"https://{self.AZURE_SPEECH_SERVICE_REGION}.api.cognitive.microsoft.com/",
)
self.LOAD_CONFIG_FROM_BLOB_STORAGE = self.get_env_var_bool(
"LOAD_CONFIG_FROM_BLOB_STORAGE"
)
self.AZURE_ML_WORKSPACE_NAME = os.getenv("AZURE_ML_WORKSPACE_NAME", "")
self.PROMPT_FLOW_ENDPOINT_NAME = os.getenv("PROMPT_FLOW_ENDPOINT_NAME", "")
self.PROMPT_FLOW_DEPLOYMENT_NAME = os.getenv("PROMPT_FLOW_DEPLOYMENT_NAME", "")
self.OPEN_AI_FUNCTIONS_SYSTEM_PROMPT = os.getenv(
"OPEN_AI_FUNCTIONS_SYSTEM_PROMPT", ""
)
self.SEMENTIC_KERNEL_SYSTEM_PROMPT = os.getenv(
"SEMENTIC_KERNEL_SYSTEM_PROMPT", ""
)
logger.info("Initializing EnvHelper completed")
def is_chat_model(self):
if "gpt-4" in self.AZURE_OPENAI_MODEL_NAME.lower():
return True
return False
def get_env_var_bool(self, var_name: str, default: str = "True") -> bool:
return os.getenv(var_name, default).lower() == "true"
def get_env_var_array(self, var_name: str, default: str = ""):
return os.getenv(var_name, default).split(",")
def get_env_var_int(self, var_name: str, default: int):
return int(os.getenv(var_name, default))
def get_env_var_float(self, var_name: str, default: float):
return float(os.getenv(var_name, default))
def is_auth_type_keys(self):
return self.AZURE_AUTH_TYPE == "keys"
def get_info_from_env(self, env_var: str, default_info: str) -> dict:
# Fetch and parse model info from the environment variable.
info_str = os.getenv(env_var, default_info)
# Handle escaped characters in the JSON string by wrapping it in double quotes for parsing.
if "\\" in info_str:
info_str = json.loads(f'"{info_str}"')
return {} if not info_str else json.loads(info_str)
@staticmethod
def check_env():
for attr, value in EnvHelper().__dict__.items():
if value == "":
logger.warning(f"{attr} is not set in the environment variables.")
@classmethod
def clear_instance(cls):
if cls._instance is not None:
cls._instance = None
class SecretHelper:
def __init__(self) -> None:
"""
Initializes an instance of the SecretHelper class.
The constructor sets the USE_KEY_VAULT attribute based on the value of the USE_KEY_VAULT environment variable.
If USE_KEY_VAULT is set to "true" (case-insensitive), it initializes a SecretClient object using the
AZURE_KEY_VAULT_ENDPOINT environment variable and the DefaultAzureCredential.
Args:
None
Returns:
None
"""
self.USE_KEY_VAULT = os.getenv("USE_KEY_VAULT", "").lower() == "true"
self.secret_client = None
if self.USE_KEY_VAULT:
self.secret_client = SecretClient(
os.environ.get("AZURE_KEY_VAULT_ENDPOINT"), DefaultAzureCredential()
)
def get_secret(self, secret_name: str) -> str:
"""
Retrieves the value of a secret from the environment variables or Azure Key Vault.
Args:
secret_name (str): The name of the secret or "".
Returns:
str: The value of the secret.
Raises:
None
"""
secret_name_value = os.getenv(secret_name, "")
return (
self.secret_client.get_secret(secret_name_value).value
if self.USE_KEY_VAULT and secret_name_value
else os.getenv(secret_name, "")
)
def get_secret_from_json(self, secret_name: str) -> str:
return (
self.secret_client.get_secret(secret_name).value
if self.USE_KEY_VAULT and secret_name
else secret_name
)