0_lab_preparation/1_get

{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 🚀 Get started to validate the setup\n", "\n", "This Jupyter notebook is recommended for workshop/education only.\n", "\n", "Prerequisites:\n", "\n", "1. Set up your computing environment\n", "2. Install the required library in your Python environment\n", "3. Select the correct kernel (`azureml_py310_sdkv2`) for your Jupyter notebook\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Azure Open AI Test\n", "\n", "---\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "from common import check_kernel\n", "check_kernel()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from openai import AzureOpenAI\n", "from dotenv import load_dotenv, find_dotenv\n", "\n", "load_dotenv()\n", "\n", "aoai_api_endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n", "aoai_api_key = os.getenv(\"AZURE_OPENAI_API_KEY\")\n", "aoai_api_version = os.getenv(\"AZURE_OPENAI_API_VERSION\")\n", "aoai_deployment_name = os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\")\n", "\n", "if not aoai_api_version:\n", " aoai_api_version = os.getenv(\"OPENAI_API_VERSION\")\n", "if not aoai_deployment_name:\n", " aoai_deployment_name = os.getenv(\"DEPLOYMENT_NAME\")\n", "\n", "try:\n", " client = AzureOpenAI(\n", " azure_endpoint=aoai_api_endpoint,\n", " api_key=aoai_api_key,\n", " api_version=aoai_api_version,\n", " )\n", " deployment_name = aoai_deployment_name\n", " print(\"=== Initialized AzuureOpenAI client ===\")\n", " print(f\"AZURE_OPENAI_ENDPOINT={aoai_api_endpoint}\")\n", " print(f\"AZURE_OPENAI_API_VERSION={aoai_api_version}\")\n", " print(f\"AZURE_OPENAI_DEPLOYMENT_NAME={aoai_deployment_name}\")\n", "except (ValueError, TypeError) as e:\n", " print(e)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create your prompt\n", "system_message = \"\"\"\n", "You are an AI assistant that helps customers find information. As an assistant, you respond to questions in a concise and unique manner.\n", "You can use Markdown to answer simply and concisely, and add a personal touch with appropriate emojis.\n", "\n", "Add a witty joke starting with \"By the way,\" at the end of your response. Do not mention the customer's name in the joke part.\n", "The joke should be related to the specific question asked.\n", "For example, if the question is about tents, the joke should be specifically related to tents.\n", "\n", "Use the given context to provide a more personalized response. Write each sentence on a new line:\n", "\"\"\"\n", "context = \"\"\"\n", " The Alpine Explorer Tent features a detachable partition to ensure privacy, \n", " numerous mesh windows and adjustable vents for ventilation, and a waterproof design. \n", " It also includes a built-in gear loft for storing outdoor essentials. \n", " In short, it offers a harmonious blend of privacy, comfort, and convenience, making it a second home in nature!\n", "\"\"\"\n", "question = \"What are features of the Alpine Explorer Tent?\"\n", "\n", "user_message = f\"\"\"\n", "Context: {context}\n", "Question: {question}\n", "\"\"\"\n", "\n", "# Simple API Call\n", "response = client.chat.completions.create(\n", " model=deployment_name,\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_message},\n", " ],\n", " temperature=0.7,\n", " max_tokens=300,\n", ")\n", "\n", "print(response.choices[0].message.content)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## (Optional) 2. Azure Document Inteligence Test\n", "\n", "---\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from azure.core.credentials import AzureKeyCredential\n", "from azure.ai.documentintelligence import DocumentIntelligenceClient\n", "from azure.ai.documentintelligence.models import ContentFormat\n", "\n", "doc_intelligence_endpoint = os.getenv(\"AZURE_DOC_INTELLIGENCE_ENDPOINT\")\n", "doc_intelligence_key = os.getenv(\"AZURE_DOC_INTELLIGENCE_KEY\")\n", "\n", "try:\n", " document_intelligence_client = DocumentIntelligenceClient(\n", " endpoint=doc_intelligence_endpoint,\n", " credential=AzureKeyCredential(doc_intelligence_key),\n", " headers={\"x-ms-useragent\": \"sample-code-figure-understanding/1.0.0\"},\n", " )\n", " print(\"=== Initialized DocumentIntelligenceClient ===\")\n", " print(f\"AZURE_DOC_INTELLIGENCE_ENDPOINT={doc_intelligence_endpoint}\")\n", "except (ValueError, TypeError) as e:\n", " print(e)\n", "\n", "raw_data_dir = \"../1_synthetic-qa-generation/raw_data\"\n", "file_path = f\"{raw_data_dir}/pdf/en-imagenet-training-wrote-by-daekeun.pdf\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(file_path, \"rb\") as f:\n", " poller = document_intelligence_client.begin_analyze_document(\n", " \"prebuilt-layout\",\n", " analyze_request=f,\n", " content_type=\"application/octet-stream\",\n", " output_content_format=ContentFormat.MARKDOWN,\n", " )\n", "\n", "result = poller.result()\n", "md_content = result.content\n", "print(md_content)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Azure ML Test\n", "\n", "---\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import yaml\n", "from datetime import datetime\n", "\n", "snapshot_date = datetime.now().strftime(\"%Y-%m-%d\")\n", "\n", "with open(\"../2_slm-fine-tuning-mlstudio/phi3/config.yml\") as f:\n", " d = yaml.load(f, Loader=yaml.FullLoader)\n", "\n", "AZURE_SUBSCRIPTION_ID = d[\"config\"][\"AZURE_SUBSCRIPTION_ID\"]\n", "AZURE_RESOURCE_GROUP = d[\"config\"][\"AZURE_RESOURCE_GROUP\"]\n", "AZURE_WORKSPACE = d[\"config\"][\"AZURE_WORKSPACE\"]\n", "AZURE_DATA_NAME = d[\"config\"][\"AZURE_DATA_NAME\"]\n", "DATA_DIR = d[\"config\"][\"DATA_DIR\"]\n", "CLOUD_DIR = d[\"config\"][\"CLOUD_DIR\"]\n", "HF_MODEL_NAME_OR_PATH = d[\"config\"][\"HF_MODEL_NAME_OR_PATH\"]\n", "IS_DEBUG = d[\"config\"][\"IS_DEBUG\"]\n", "USE_LOWPRIORITY_VM = d[\"config\"][\"USE_LOWPRIORITY_VM\"]\n", "\n", "\n", "print(f\"AZURE_SUBSCRIPTION_ID={AZURE_SUBSCRIPTION_ID}\")\n", "print(f\"AZURE_RESOURCE_GROUP={AZURE_RESOURCE_GROUP}\")\n", "print(f\"AZURE_WORKSPACE={AZURE_WORKSPACE}\")\n", "print(f\"AZURE_DATA_NAME={AZURE_DATA_NAME}\")\n", "print(f\"DATA_DIR={DATA_DIR}\")\n", "print(f\"CLOUD_DIR={CLOUD_DIR}\")\n", "print(f\"HF_MODEL_NAME_OR_PATH={HF_MODEL_NAME_OR_PATH}\")\n", "print(f\"IS_DEBUG={IS_DEBUG}\")\n", "print(f\"USE_LOWPRIORITY_VM={USE_LOWPRIORITY_VM}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", "from azure.ai.ml import MLClient\n", "from azure.core.exceptions import HttpResponseError\n", "\n", "credential = DefaultAzureCredential()\n", "ml_client = MLClient(\n", " credential, AZURE_SUBSCRIPTION_ID, AZURE_RESOURCE_GROUP, AZURE_WORKSPACE\n", ")\n", "\n", "# from azure.identity import ClientSecretCredential\n", "# credentials = ClientSecretCredential(\n", "# client_id=client_id,\n", "# client_secret=client_secret,\n", "# tenant_id=tenant_id\n", "# )\n", "\n", "try:\n", " workspace = ml_client.workspaces.get(name=AZURE_WORKSPACE)\n", " print(f\"Connected to Azure ML Workspace: {workspace.name}\")\n", " print(f\"Workspace Location: {workspace.location}\")\n", " print(f\"Workspace ID: {workspace.id}\")\n", "except HttpResponseError as e:\n", " print(f\"Failed to connect to Azure ML Workspace: {e}\")" ] } ], "metadata": { "kernelspec": { "display_name": "py312-dev", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }

0_lab_preparation/1_get_started.ipynb (276 lines of code) (raw):