sdk/python/foundation-models/nvidia-nemo/evaluate/nvidia-text-qna-evaluation.ipynb (546 lines of code) (raw):
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700045134531
}
},
"outputs": [],
"source": [
"# Prequisites\n",
"\n",
"# %pip install --upgrade azure-ai-ml\n",
"# %pip install --upgrade azure-identity\n",
"# %pip install --upgrade datasets==2.9.0\n",
"# %pip install py7zr"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049432488
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"from azure.ai.ml import MLClient\n",
"from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n",
"from azure.ai.ml.entities import AmlCompute\n",
"import time\n",
"\n",
"try:\n",
" credential = DefaultAzureCredential()\n",
" credential.get_token(\"https://management.azure.com/.default\")\n",
"except Exception as ex:\n",
" credential = InteractiveBrowserCredential()\n",
"\n",
"workspace_ml_client = None\n",
"try:\n",
" workspace_ml_client = MLClient.from_config(credential)\n",
" subscription_id = workspace_ml_client.subscription_id\n",
" workspace = workspace_ml_client.workspace_name\n",
" resource_group = workspace_ml_client.resource_group_name\n",
"except Exception as ex:\n",
" print(ex)\n",
" # Enter details of your AML workspace\n",
" subscription_id = \"<SUBSCRIPTION_ID>\"\n",
" resource_group = \"<RESOURCE_GROUP>\"\n",
" workspace = \"<AML_WORKSPACE_NAME>\"\n",
" workspace_ml_client = MLClient(\n",
" credential, subscription_id, resource_group, workspace\n",
" )\n",
"\n",
"# replace with the registry name\n",
"nemo_registry = \"nvidia-ai\" # \"azureml\"\n",
"\n",
"# registry where models are present\n",
"model_registry = \"azureml-preview-test1\"\n",
"\n",
"nemo_registry_ml_client = MLClient(\n",
" credential, subscription_id, resource_group, registry_name=nemo_registry\n",
")\n",
"model_registry_ml_client = MLClient(\n",
" credential, subscription_id, resource_group, registry_name=model_registry\n",
")\n",
"nemo_registry_ml_client"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## Creating Computes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049435366
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"# If you already have a gpu cluster, mention it here. Else will create a new one with the name 'ghyadav-westus-a100'\n",
"# model will only run on an a100 instance\n",
"\n",
"compute_cluster = \"nemo-westus\"\n",
"try:\n",
" compute = workspace_ml_client.compute.get(compute_cluster)\n",
" print(f\"GPU compute '{compute_cluster}' found.\")\n",
"except Exception as ex:\n",
" print(f\"GPU compute '{compute_cluster}' not found. Creating new one.\")\n",
" compute = AmlCompute(\n",
" name=compute_cluster,\n",
" size=\"STANDARD_ND96AMSR_A100_V4\",\n",
" max_instances=2, # For multi node training set this to an integer value more than 1\n",
" )\n",
" workspace_ml_client.compute.begin_create_or_update(compute).wait()\n",
"\n",
"# generating a unique timestamp that can be used for names and versions that need to be unique\n",
"timestamp = str(int(time.time()))\n",
"\n",
"\n",
"# This is the number of GPUs in a single node of the selected 'vm_size' compute.\n",
"# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.\n",
"# Setting this to more than the number of GPUs will result in an error.\n",
"gpus_per_node = 1 # default value\n",
"gpu_count_found = False\n",
"ws_computes = workspace_ml_client.compute.list_sizes()\n",
"for ws_compute in ws_computes:\n",
" if ws_compute.name.lower() == compute.size.lower():\n",
" gpus_per_node = ws_compute.gpus\n",
" print(f\"Number of GPUs in compute {ws_compute.name} are {ws_compute.gpus}\")\n",
"# if gpu_count_found not found, then print an error\n",
"if gpus_per_node > 0:\n",
" gpu_count_found = True\n",
"else:\n",
" gpu_count_found = False\n",
" print(f\"No GPUs found in compute. Number of GPUs in compute {compute.size} 0.\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## Input Data for Evaluation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049437492
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"# download the dataset using the helper script. This needs datasets library: https://pypi.org/project/datasets/\n",
"import os\n",
"from datasets import load_dataset, get_dataset_split_names\n",
"\n",
"dataset_dir = \"pubmed-dataset\"\n",
"dataset_name = \"pubmed_qa\"\n",
"# create the download directory if it does not exist\n",
"if not os.path.exists(dataset_dir):\n",
" os.makedirs(dataset_dir)\n",
"\n",
"\n",
"# import hugging face datasets library\n",
"\n",
"split = \"train\" # Only test available\n",
"dataset = load_dataset(dataset_name, \"pqa_labeled\", split=split)\n",
"# save the split of the dataset to the download directory as json lines file\n",
"dataset.to_json(os.path.join(dataset_dir, f\"{split}.jsonl\"))\n",
"# print dataset features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700045144197
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700045144360
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"# evaluation_dataset = 'text-generation-data.jsonl' # evaluation_dataset_path\n",
"# prompt_data = '' # prompt_data_path\n",
"# load the ./samsum-dataset/train.jsonl file into a pandas dataframe and show the first 5 rows\n",
"import pandas as pd\n",
"\n",
"evaluation_dataset = \"./pubmed-dataset/test/test.jsonl\"\n",
"pd.set_option(\n",
" \"display.max_colwidth\", 0\n",
") # set the max column width to 0 to display the full text\n",
"df = pd.read_json(\"./pubmed-dataset/train.jsonl\", lines=True)\n",
"df = df[:10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700045144673
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"def form_question(obj):\n",
" st = \"\"\n",
" st += f\"QUESTION: {obj['question']}\\n\"\n",
" st += \"CONTEXT: \"\n",
" for i in range(len(obj[\"context\"][\"labels\"])):\n",
" st += f\"{obj['context']['contexts'][i]}\\n\"\n",
" st += f\"TARGET: the answer to the question given the context is (yes|no|maybe): \"\n",
" return st\n",
"\n",
"\n",
"result = []\n",
"for i, row in df.iterrows():\n",
" result.append(form_question(row))\n",
"data = pd.DataFrame({\"input\": result})\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700045144799
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"frac = 1\n",
"evaluation_dataset = \"./pubmedqa-dataset/test/test_frac.jsonl\"\n",
"os.makedirs(\"./pubmedqa-dataset/test\", exist_ok=True)\n",
"data.sample(frac=frac).to_json(evaluation_dataset, orient=\"records\", lines=True)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## Loading model from Registry"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049445465
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"model_name = \"Nemotron-3-8B-Base-4k\"\n",
"model_version = \"latest\"\n",
"\n",
"nemo_model_object = nemo_registry_ml_client.models.get(model_name, label=\"latest\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"# Prompt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049447829
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"prompt_path = \"prompts/prompt-qna.txt\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## Submitting Evaluation Pipeline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049645884
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"from azure.ai.ml.dsl import pipeline\n",
"from azure.ai.ml import Input\n",
"from azure.ai.ml.constants import AssetTypes\n",
"\n",
"# fetch the pipeline component\n",
"pipeline_component_func = nemo_registry_ml_client.components.get(\n",
" name=\"nemo_qna_evaluation\",\n",
" label=\"latest\"\n",
" # name = \"nemo_prediction_base\", label=\"latest\"\n",
")\n",
"openai_params = '{\"type\":\"azure_open_ai\",\"model_name\":\"gpt-35-turbo\",\"deployment_name\":\"gpt-35-turbo\",\"questions\":\"input\",\"contexts\":\"input\"}'\n",
"\n",
"\n",
"# define the pipeline job\n",
"@pipeline()\n",
"def evaluation_pipeline(mlflow_model):\n",
" evaluation_job = pipeline_component_func(\n",
" dataset_path=Input(type=AssetTypes.URI_FOLDER, path=\"./pubmedqa-dataset/test/\"),\n",
" model_path=Input(type=AssetTypes.TRITON_MODEL, path=f\"{nemo_model_object.id}\"),\n",
" openai_config_params=openai_params,\n",
" # prompt_schema=Input(type=AssetTypes.URI_FILE, path=prompt_path), # Not required for text-gen task\n",
" # prediction_column_name=\"output\",\n",
" )\n",
" return {\"evaluation_result\": evaluation_job.outputs.evaluation_result}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1700049692743
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"experiment_name = \"nemo-qna-eval-pipeline\"\n",
"pipeline_jobs = []\n",
"\n",
"pipeline_object = evaluation_pipeline()\n",
"\n",
"# don't reuse cached results from previous jobs\n",
"pipeline_object.settings.force_rerun = True\n",
"pipeline_object.settings.default_compute = compute_cluster\n",
"pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
" pipeline_object, experiment_name=experiment_name\n",
")\n",
"# add model['name'] and pipeline_job.name as key value pairs to a dictionary\n",
"pipeline_jobs.append({\"model_name\": model_name, \"job_name\": pipeline_job.name})\n",
"# wait for the pipeline job to complete\n",
"workspace_ml_client.jobs.stream(pipeline_job.name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernel_info": {
"name": "ghyadav1"
},
"kernelspec": {
"display_name": "ghyadav1",
"language": "python",
"name": "ghyadav1"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
},
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}