sdk/python/foundation-models/nvidia-nemo/evaluate/nvidia-text-qna-evaluation.ipynb

{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700045134531 } }, "outputs": [], "source": [ "# Prequisites\n", "\n", "# %pip install --upgrade azure-ai-ml\n", "# %pip install --upgrade azure-identity\n", "# %pip install --upgrade datasets==2.9.0\n", "# %pip install py7zr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049432488 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "from azure.ai.ml import MLClient\n", "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", "from azure.ai.ml.entities import AmlCompute\n", "import time\n", "\n", "try:\n", " credential = DefaultAzureCredential()\n", " credential.get_token(\"https://management.azure.com/.default\")\n", "except Exception as ex:\n", " credential = InteractiveBrowserCredential()\n", "\n", "workspace_ml_client = None\n", "try:\n", " workspace_ml_client = MLClient.from_config(credential)\n", " subscription_id = workspace_ml_client.subscription_id\n", " workspace = workspace_ml_client.workspace_name\n", " resource_group = workspace_ml_client.resource_group_name\n", "except Exception as ex:\n", " print(ex)\n", " # Enter details of your AML workspace\n", " subscription_id = \"<SUBSCRIPTION_ID>\"\n", " resource_group = \"<RESOURCE_GROUP>\"\n", " workspace = \"<AML_WORKSPACE_NAME>\"\n", " workspace_ml_client = MLClient(\n", " credential, subscription_id, resource_group, workspace\n", " )\n", "\n", "# replace with the registry name\n", "nemo_registry = \"nvidia-ai\" # \"azureml\"\n", "\n", "# registry where models are present\n", "model_registry = \"azureml-preview-test1\"\n", "\n", "nemo_registry_ml_client = MLClient(\n", " credential, subscription_id, resource_group, registry_name=nemo_registry\n", ")\n", "model_registry_ml_client = MLClient(\n", " credential, subscription_id, resource_group, registry_name=model_registry\n", ")\n", "nemo_registry_ml_client" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "## Creating Computes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049435366 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'ghyadav-westus-a100'\n", "# model will only run on an a100 instance\n", "\n", "compute_cluster = \"nemo-westus\"\n", "try:\n", " compute = workspace_ml_client.compute.get(compute_cluster)\n", " print(f\"GPU compute '{compute_cluster}' found.\")\n", "except Exception as ex:\n", " print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n", " compute = AmlCompute(\n", " name=compute_cluster,\n", " size=\"STANDARD_ND96AMSR_A100_V4\",\n", " max_instances=2, # For multi node training set this to an integer value more than 1\n", " )\n", " workspace_ml_client.compute.begin_create_or_update(compute).wait()\n", "\n", "# generating a unique timestamp that can be used for names and versions that need to be unique\n", "timestamp = str(int(time.time()))\n", "\n", "\n", "# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n", "# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n", "# Setting this to more than the number of GPUs will result in an error.\n", "gpus_per_node = 1 # default value\n", "gpu_count_found = False\n", "ws_computes = workspace_ml_client.compute.list_sizes()\n", "for ws_compute in ws_computes:\n", " if ws_compute.name.lower() == compute.size.lower():\n", " gpus_per_node = ws_compute.gpus\n", " print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n", "# if gpu_count_found not found, then print an error\n", "if gpus_per_node > 0:\n", " gpu_count_found = True\n", "else:\n", " gpu_count_found = False\n", " print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "## Input Data for Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049437492 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "# download the dataset using the helper script. This needs datasets library: https://pypi.org/project/datasets/\n", "import os\n", "from datasets import load_dataset, get_dataset_split_names\n", "\n", "dataset_dir = \"pubmed-dataset\"\n", "dataset_name = \"pubmed_qa\"\n", "# create the download directory if it does not exist\n", "if not os.path.exists(dataset_dir):\n", " os.makedirs(dataset_dir)\n", "\n", "\n", "# import hugging face datasets library\n", "\n", "split = \"train\" # Only test available\n", "dataset = load_dataset(dataset_name, \"pqa_labeled\", split=split)\n", "# save the split of the dataset to the download directory as json lines file\n", "dataset.to_json(os.path.join(dataset_dir, f\"{split}.jsonl\"))\n", "# print dataset features" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700045144197 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700045144360 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "# evaluation_dataset = 'text-generation-data.jsonl' # evaluation_dataset_path\n", "# prompt_data = '' # prompt_data_path\n", "# load the ./samsum-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n", "import pandas as pd\n", "\n", "evaluation_dataset = \"./pubmed-dataset/test/test.jsonl\"\n", "pd.set_option(\n", " \"display.max_colwidth\", 0\n", ") # set the max column width to 0 to display the full text\n", "df = pd.read_json(\"./pubmed-dataset/train.jsonl\", lines=True)\n", "df = df[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700045144673 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "def form_question(obj):\n", " st = \"\"\n", " st += f\"QUESTION: {obj['question']}\\n\"\n", " st += \"CONTEXT: \"\n", " for i in range(len(obj[\"context\"][\"labels\"])):\n", " st += f\"{obj['context']['contexts'][i]}\\n\"\n", " st += f\"TARGET: the answer to the question given the context is (yes|no|maybe): \"\n", " return st\n", "\n", "\n", "result = []\n", "for i, row in df.iterrows():\n", " result.append(form_question(row))\n", "data = pd.DataFrame({\"input\": result})\n", "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700045144799 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "frac = 1\n", "evaluation_dataset = \"./pubmedqa-dataset/test/test_frac.jsonl\"\n", "os.makedirs(\"./pubmedqa-dataset/test\", exist_ok=True)\n", "data.sample(frac=frac).to_json(evaluation_dataset, orient=\"records\", lines=True)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "## Loading model from Registry" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049445465 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "model_name = \"Nemotron-3-8B-Base-4k\"\n", "model_version = \"latest\"\n", "\n", "nemo_model_object = nemo_registry_ml_client.models.get(model_name, label=\"latest\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "# Prompt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049447829 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "prompt_path = \"prompts/prompt-qna.txt\"" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "nteract": { "transient": { "deleting": false } } }, "source": [ "## Submitting Evaluation Pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049645884 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "from azure.ai.ml.dsl import pipeline\n", "from azure.ai.ml import Input\n", "from azure.ai.ml.constants import AssetTypes\n", "\n", "# fetch the pipeline component\n", "pipeline_component_func = nemo_registry_ml_client.components.get(\n", " name=\"nemo_qna_evaluation\",\n", " label=\"latest\"\n", " # name = \"nemo_prediction_base\", label=\"latest\"\n", ")\n", "openai_params = '{\"type\":\"azure_open_ai\",\"model_name\":\"gpt-35-turbo\",\"deployment_name\":\"gpt-35-turbo\",\"questions\":\"input\",\"contexts\":\"input\"}'\n", "\n", "\n", "# define the pipeline job\n", "@pipeline()\n", "def evaluation_pipeline(mlflow_model):\n", " evaluation_job = pipeline_component_func(\n", " dataset_path=Input(type=AssetTypes.URI_FOLDER, path=\"./pubmedqa-dataset/test/\"),\n", " model_path=Input(type=AssetTypes.TRITON_MODEL, path=f\"{nemo_model_object.id}\"),\n", " openai_config_params=openai_params,\n", " # prompt_schema=Input(type=AssetTypes.URI_FILE, path=prompt_path), # Not required for text-gen task\n", " # prediction_column_name=\"output\",\n", " )\n", " return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "gather": { "logged": 1700049692743 }, "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [ "experiment_name = \"nemo-qna-eval-pipeline\"\n", "pipeline_jobs = []\n", "\n", "pipeline_object = evaluation_pipeline()\n", "\n", "# don't reuse cached results from previous jobs\n", "pipeline_object.settings.force_rerun = True\n", "pipeline_object.settings.default_compute = compute_cluster\n", "pipeline_job = workspace_ml_client.jobs.create_or_update(\n", " pipeline_object, experiment_name=experiment_name\n", ")\n", "# add model['name'] and pipeline_job.name as key value pairs to a dictionary\n", "pipeline_jobs.append({\"model_name\": model_name, \"job_name\": pipeline_job.name})\n", "# wait for the pipeline job to complete\n", "workspace_ml_client.jobs.stream(pipeline_job.name)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "jupyter": { "outputs_hidden": false, "source_hidden": false }, "nteract": { "transient": { "deleting": false } } }, "outputs": [], "source": [] } ], "metadata": { "kernel_info": { "name": "ghyadav1" }, "kernelspec": { "display_name": "ghyadav1", "language": "python", "name": "ghyadav1" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } }, "ms_spell_check": { "ms_spell_check_language": "en" } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 2 }

sdk/python/foundation-models/nvidia-nemo/evaluate/nvidia-text-qna-evaluation.ipynb (546 lines of code) (raw):