sdk/python/foundation-models/system/distillation/nlu_qa/pipelines/openbookqa.yaml (279 lines of code) (raw):
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
display_name: openbookqa_eval
description: Evaluate distilled models on openbook qa dataset
inputs:
task: question-answering
sample_ratio: 0.01
ground_truth_column_name: completion
prediction_column_name: prediction
# batch_score inputs
endpoint_url: ""
deployment_name: ""
authentication_type: azureml_workspace_connection
connection_name: ""
debug_mode: False
jobs:
downloader:
type: command
component: azureml://registries/azureml/components/dataset_downloader/labels/latest
limits: {}
inputs:
dataset_name: openbookqa
configuration: main
split: validation
outputs:
output_dataset:
type: uri_folder
sampler:
type: command
component: azureml://registries/azureml/components/dataset_sampler/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.downloader.outputs.output_dataset}}
sampling_style: head
sampling_ratio: ${{parent.inputs.sample_ratio}}
random_seed: 0
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
preprocessor:
type: command
component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.sampler.outputs.output_dataset}}
template_input: '{"question":{{question_stem}}, "choices":{{choices.text}},"answer":{{answerKey}}}'
encoder_config: '{"column_name": "answer", "A": 1, "B": 2, "C": 3, "D": 4}'
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
downloader_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_downloader/labels/latest
limits: {}
inputs:
dataset_name: openbookqa
configuration: main
split: train
outputs:
output_dataset:
type: uri_folder
sampler_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_sampler/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.downloader_fewshot.outputs.output_dataset}}
sampling_style: head
n_samples: 8
random_seed: 0
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
preprocessor_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.sampler_fewshot.outputs.output_dataset}}
template_input: '{"question":{{question_stem}}, "choices":{{choices.text}},
"answer":{{answerKey}}}'
encoder_config: '{"column_name": "answer", "A": 1, "B": 2, "C": 3, "D": 4}'
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
promptcrafter:
type: command
component: azureml://registries/azureml/components/prompt_crafter/labels/latest
limits: {}
inputs:
test_data:
type: uri_folder
path: ${{parent.jobs.preprocessor.outputs.output_dataset}}
few_shot_data:
type: uri_file
path: ${{parent.jobs.preprocessor_fewshot.outputs.output_dataset}}
prompt_type: completions
prefix: "\n\nYou are given a problem and asked to choose between answers in options 1, 2, 3, or 4. \
There are examples within <example> tags for you to learn from. For the last question respond \
with only the number of the option you choose. This number must be within the tags \
<answer> and </answer>. Do not add other text to the response.\n\nHuman:"
prompt_pattern: "\nQuestion: {{question}}\n\
Answer options:\n\
(1) {{choices[0]}}\n\
(2) {{choices[1]}}\n\
(3) {{choices[2]}}\n\
(4) {{choices[3]}}\n\
The answer is:\n\nAssistant:\n\n"
n_shots: 8
output_pattern: '{{answer}}'
few_shot_separator: "\n\n"
few_shot_pattern: "<example>\n\
Question: {{question}}\n\
Answer options:\n\
(1) {{choices[0]}}\n\
(2) {{choices[1]}}\n\
(3) {{choices[2]}}\n\
(4) {{choices[3]}}\n\
The answer is: {{answer}}\n\
</example>"
random_seed: 0
outputs:
output_file:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
batch_score_preprocessor:
type: command
component: azureml://registries/azureml/components/batch_inference_preparer/labels/latest
limits: {}
inputs:
input_dataset:
type: uri_file
path: ${{parent.jobs.promptcrafter.outputs.output_file}}
model_type: oai
batch_input_pattern: '{"messages": [{"role": "user", "content": "###<prompt>"}], "temperature": 0.6, "top_p": 1.0, "max_new_tokens": 2048, "frequency_penalty": 0.0, "presence_penalty": 0.0}'
label_column_name: ${{parent.inputs.ground_truth_column_name}}
is_performance_test: false
outputs:
formatted_data:
type: mltable
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
ground_truth_metadata:
type: uri_folder
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
config_generator:
type: command
component: azureml://registries/azureml/components/batch_benchmark_config_generator/labels/latest
inputs:
scoring_url: ${{parent.inputs.endpoint_url}}
deployment_name: ${{parent.inputs.deployment_name}}
authentication_type: ${{parent.inputs.authentication_type}}
connection_name: ${{parent.inputs.connection_name}}
additional_headers: ""
debug_mode: ${{parent.inputs.debug_mode}}
ensure_ascii: false
max_retry_time_interval: 300
initial_worker_count: 5
max_worker_count: 200
model_type: oss
outputs:
batch_score_config:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
# Batch score job
batch_score:
type: parallel
component: azureml://registries/azureml/components/batch_score_oss/labels/latest
inputs:
async_mode: False
data_input_table: ${{parent.jobs.batch_score_preprocessor.outputs.formatted_data}}
configuration_file: ${{parent.jobs.config_generator.outputs.batch_score_config}}
outputs:
job_output_path:
type: uri_file
mini_batch_results_output_directory:
type: uri_folder
resources:
instance_count: 1
max_concurrency_per_instance: 1
retry_settings:
timeout: 6000
max_retries: 10
environment_variables:
BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180'
BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False'
BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300'
batch_score_postprocessor:
type: command
component: azureml://registries/azureml/components/batch_output_formatter/labels/latest
limits: {}
inputs:
batch_inference_output:
type: uri_folder
path: ${{parent.jobs.batch_score.outputs.mini_batch_results_output_directory}}
ground_truth_input:
type: uri_file
path: ${{parent.jobs.batch_score_preprocessor.outputs.ground_truth_metadata}}
model_type: oai
label_column_name: ${{parent.inputs.ground_truth_column_name}}
endpoint_url: ${{parent.inputs.endpoint_url}}
handle_response_failure: use_fallback
min_endpoint_success_ratio: 0.0
is_performance_test: false
use_tiktoken: false
outputs:
predictions:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
performance_metadata:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
ground_truth:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
successful_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
failed_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
unsafe_content_blocked_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
postprocessor:
type: command
component: azureml://registries/azureml/components/inference_postprocessor/labels/latest
limits: {}
inputs:
ground_truth_dataset:
type: uri_folder
path: ${{parent.jobs.batch_score_postprocessor.outputs.ground_truth}}
prediction_dataset:
type: uri_folder
path: ${{parent.jobs.batch_score_postprocessor.outputs.predictions}}
ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
prediction_column_name: ${{parent.inputs.prediction_column_name}}
separator: '
'
find_first: 1,2,3,4
outputs:
output_dataset_result:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
compute_metrics:
type: command
component: azureml://registries/azureml/components/compute_metrics/labels/latest
limits: {}
inputs:
ground_truth:
type: uri_folder
path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
prediction:
type: uri_folder
path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
task: ${{parent.inputs.task}}
ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
prediction_column_name: ${{parent.inputs.prediction_column_name}}
evaluation_config_params: '{"regexes_to_ignore": ["\\.0+$", ","]}'
outputs:
evaluation_result:
type: uri_folder
tags:
workflow: distill_llm_benchmark
evaluation_type: question-answering
properties:
_azureml.evaluation_run: Benchmark
settings:
force_rerun: false
default_compute: azureml:serverless