util/deploy_moe.py (168 lines of code) (raw):

import os, uuid # set environment variables before importing any other code from dotenv import load_dotenv load_dotenv() from azure.ai.ml import MLClient from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Model, Environment, BuildContext from azure.identity import DefaultAzureCredential from azure.mgmt.authorization import AuthorizationManagementClient from azure.mgmt.authorization.models import RoleAssignmentCreateParameters from azure.core.exceptions import ResourceExistsError from uuid import uuid4 from azure_config import AzureConfig # Read configuration azure_config = AzureConfig() print("Initializing MLClient...") client = MLClient( DefaultAzureCredential(), azure_config.subscription_id, azure_config.resource_group, azure_config.workspace_name ) def get_ai_studio_url_for_deploy( client: MLClient, endpoint_name: str, deployment_name ) -> str: studio_base_url = "https://ai.azure.com" deployment_url = f"{studio_base_url}/projectdeployments/realtime/{endpoint_name}/{deployment_name}/detail?wsid=/subscriptions/{client.subscription_id}/resourceGroups/{client.resource_group_name}/providers/Microsoft.MachineLearningServices/workspaces/{client.workspace_name}&deploymentName={deployment_name}" return deployment_url def output_deployment_details(client, endpoint_name, deployment_name) -> str: print("\n ~~~Deployment details~~~") print(f"Your online endpoint name is: {endpoint_name}") print(f"Your deployment name is: {deployment_name}") print("\n ~~~Test in the Azure AI Studio~~~") print("\n Follow this link to your deployment in the Azure AI Studio:") print(get_ai_studio_url_for_deploy(client=client, endpoint_name=endpoint_name, deployment_name=deployment_name)) def deploy_flow(endpoint_name, deployment_name): # check if endpoint exists, create endpoint object if not try: endpoint = client.online_endpoints.get(endpoint_name) except Exception as e: endpoint = ManagedOnlineEndpoint( name=endpoint_name, properties={ "enforce_access_to_default_secret_stores": "enabled" # if you want secret injection support }, auth_mode="aad_token" # using aad auth instead of key-based auth ) # Get the directory of the current script script_dir = os.path.dirname(os.path.abspath(__file__)) print(f"Script directory: {script_dir}") # Define the path to the directory, appending the script directory to the relative path flow_path = os.path.abspath(os.path.join(script_dir, "../dist")) print(f"Flow path: {flow_path}") # Create dummy file in connections folder (promptflow issue #1274) connections_path = os.path.join(flow_path, "connections") os.makedirs(connections_path, exist_ok=True) dummy_file_path = os.path.join(connections_path, "dummy.txt") with open(dummy_file_path, 'w') as dummy_file: pass deployment = ManagedOnlineDeployment( name=deployment_name, endpoint_name=endpoint_name, model=Model( name="ragflow", path=flow_path, # path to promptflow folder properties=[ # this enables the chat interface in the endpoint test tab ["azureml.promptflow.source_flow_id", "ragflow"], ["azureml.promptflow.mode", "chat"], ["azureml.promptflow.chat_input", "question"], ["azureml.promptflow.chat_output", "answer"] ] ), environment=Environment( build=BuildContext( path=flow_path, ), inference_config={ "liveness_route": { "path": "/health", "port": 8080, }, "readiness_route": { "path": "/health", "port": 8080, }, "scoring_route":{ "path": "/score", "port": 8080, }, }, ), # instance type comes with associated cost. # make sure you have quota for the specified instance type # See more details here: https://learn.microsoft.com/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list instance_type="Standard_DS3_v2", instance_count=1, environment_variables={ "PRT_CONFIG_OVERRIDE": f"deployment.subscription_id={client.subscription_id},deployment.resource_group={client.resource_group_name},deployment.workspace_name={client.workspace_name},deployment.endpoint_name={endpoint_name},deployment.deployment_name={deployment_name}", "AZURE_SUBSCRIPTION_ID": os.environ["AZURE_SUBSCRIPTION_ID"], "AZURE_RESOURCE_GROUP": os.environ["AZURE_RESOURCE_GROUP"], "AZUREAI_PROJECT_NAME": os.environ["AZUREAI_PROJECT_NAME"], "AZURE_OPENAI_ENDPOINT": azure_config.aoai_endpoint, "AZURE_OPENAI_API_VERSION": azure_config.aoai_api_version, "AZURE_SEARCH_ENDPOINT": azure_config.search_endpoint, "AZURE_OPENAI_CHAT_DEPLOYMENT": os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"), "AZURE_OPENAI_EMBEDDING_MODEL": os.getenv("AZURE_OPENAI_EMBEDDING_MODEL"), "AZURE_OPENAI_EMBEDDING_DEPLOYMENT": os.getenv("AZURE_OPENAI_EMBEDDING_MODEL") # using the same name for the deployment as the model for simplicity } ) # 1. create endpoint endpoint = client.begin_create_or_update(endpoint).result() # result() means we wait on this to complete # 2. provide endpoint access to Azure Open AI resource create_role_assignment( scope=f"/subscriptions/{client.subscription_id}/resourceGroups/{azure_config.resource_group}/providers/Microsoft.CognitiveServices/accounts/{azure_config.aoai_account_name}", role_name="Cognitive Services OpenAI User", principal_id=endpoint.identity.principal_id ) create_role_assignment( scope=f"/subscriptions/{client.subscription_id}/resourceGroups/{azure_config.resource_group}/providers/Microsoft.CognitiveServices/accounts/{azure_config.aoai_account_name}", role_name="Cognitive Services Contributor", principal_id=endpoint.identity.principal_id ) create_role_assignment( scope=f"/subscriptions/{client.subscription_id}/resourceGroups/{azure_config.resource_group}", role_name="Contributor", principal_id=endpoint.identity.principal_id ) # 3. provide endpoint access to Azure AI Search resource create_role_assignment( scope=f"/subscriptions/{client.subscription_id}/resourceGroups/{azure_config.resource_group}/providers/Microsoft.Search/searchServices/{azure_config.search_account_name}", role_name="Search Index Data Contributor", principal_id=endpoint.identity.principal_id ) # 4. provide endpoint access to workspace create_role_assignment( scope=f"/subscriptions/{azure_config.subscription_id}/resourceGroups/{azure_config.resource_group}/providers/Microsoft.MachineLearningServices/workspaces/{azure_config.workspace_name}", role_name="Contributor", principal_id=endpoint.identity.principal_id ) # 5. create deployment deployment = client.begin_create_or_update(deployment).result() # 6. update endpoint traffic for the deployment endpoint.traffic = {deployment_name: 100} # 100% of traffic endpoint = client.begin_create_or_update(endpoint).result() output_deployment_details( client=client, endpoint_name=endpoint_name, deployment_name=deployment_name ) def create_role_assignment(scope, role_name, principal_id): try: # Get credential credential = DefaultAzureCredential() # Instantiate the authorization management client auth_client = AuthorizationManagementClient( credential=credential, subscription_id=client.subscription_id ) roles = list(auth_client.role_definitions.list( scope, filter="roleName eq '{}'".format(role_name))) assert len(roles) == 1 role = roles[0] # Create role assignment properties parameters = RoleAssignmentCreateParameters( role_definition_id=role.id, principal_id=principal_id, principal_type="ServicePrincipal" ) # Create role assignment role_assignment = auth_client.role_assignments.create( scope=scope, role_assignment_name=str(uuid4()), parameters=parameters ) except ResourceExistsError: print("Role assignment already exists.") except Exception as e: print(f"An error occurred during role assignment: {e}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--endpoint-name", help="endpoint name to use when deploying or invoking the flow", type=str) parser.add_argument("--deployment-name", help="deployment name used to deploy to a managed online endpoint in AI Studio", type=str) args = parser.parse_args() endpoint_name = args.endpoint_name if args.endpoint_name else f"rag-0000-endpoint" deployment_name = args.deployment_name if args.deployment_name else f"rag-0000-deployment" deploy_flow(endpoint_name, deployment_name)