sdk/python/foundation-models/system/distillation/conversation/pipelines/hellaswag.yaml (275 lines of code) (raw):
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
display_name: hellaswag_eval
description: Evaluate distilled models on hellaswag chat dataset
inputs:
task: question-answering
sample_ratio: 0.01
ground_truth_column_name: completion
prediction_column_name: prediction
# batch_score inputs
endpoint_url: ""
deployment_name: ""
authentication_type: azureml_workspace_connection
connection_name: ""
debug_mode: False
jobs:
downloader:
type: command
component: azureml://registries/azureml/components/dataset_downloader/labels/latest
limits: {}
inputs:
dataset_name: hellaswag
configuration: all
split: validation
outputs:
output_dataset:
type: uri_folder
sampler:
type: command
component: azureml://registries/azureml/components/dataset_sampler/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.downloader.outputs.output_dataset}}
sampling_style: head
sampling_ratio: ${{parent.inputs.sample_ratio}}
random_seed: 0
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
preprocessor:
type: command
component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.sampler.outputs.output_dataset}}
template_input: "{\n \"activity_label\": {{activity_label}},\n {% for i in\
\ range(endings|length) %}\n \"choice_{{(i+1)}}\": {{endings[i]}},\n {%\
\ endfor %}\n \"ctx\": {{ctx}},\n {% set label_int = label | int %}\n \"\
label\": {{(label_int + 1)}}\n}"
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
downloader_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_downloader/labels/latest
limits: {}
inputs:
dataset_name: hellaswag
configuration: all
split: train
outputs:
output_dataset:
type: uri_folder
sampler_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_sampler/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.downloader_fewshot.outputs.output_dataset}}
sampling_style: head
n_samples: 8
random_seed: 0
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
preprocessor_fewshot:
type: command
component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest
limits: {}
inputs:
dataset:
type: uri_folder
path: ${{parent.jobs.sampler_fewshot.outputs.output_dataset}}
template_input: "{\n \"activity_label\": {{activity_label}},\n {% for i in\
\ range(endings|length) %}\n \"choice_{{(i+1)}}\": {{endings[i]}},\n {%\
\ endfor %}\n \"ctx\": {{ctx}},\n {% set label_int = label | int %}\n \"\
label\": {{(label_int + 1)}}\n}"
outputs:
output_dataset:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
promptcrafter:
type: command
component: azureml://registries/azureml/components/prompt_crafter/labels/latest
limits: {}
inputs:
test_data:
type: uri_folder
path: ${{parent.jobs.preprocessor.outputs.output_dataset}}
few_shot_data:
type: uri_file
path: ${{parent.jobs.preprocessor_fewshot.outputs.output_dataset}}
prompt_type: chat
prompt_pattern: 'Category: {{activity_label}}
Text: {{ctx}}
Completion options:
(1) {{choice_1}}
(2) {{choice_2}}
(3) {{choice_3}}
(4) {{choice_4}}
The most likely text completion is: '
n_shots: 5
output_pattern: '{{label}}'
system_message: Given a partial description of an event as text, your task is
to select the most appropriate continuation from the provided options. Carefully
analyze the context and use your understanding of the world to make the best
choice. Consider logical flow, relevance, and commonsense reasoning when making
your selection. Always choose the answer from provided completion options
that best completes the given sentence or scenario.
random_seed: 0
outputs:
output_file:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
batch_score_preprocessor:
type: command
component: azureml://registries/azureml/components/batch_inference_preparer/labels/latest
limits: {}
inputs:
input_dataset:
type: uri_file
path: ${{parent.jobs.promptcrafter.outputs.output_file}}
model_type: oai
batch_input_pattern: '{"messages": ###<prompt>, "temperature": 0.0, "max_tokens": 10, "top_p": 1.0}'
label_column_name: ${{parent.inputs.ground_truth_column_name}}
is_performance_test: false
outputs:
formatted_data:
type: mltable
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
ground_truth_metadata:
type: uri_folder
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}
config_generator:
type: command
component: azureml://registries/azureml/components/batch_benchmark_config_generator/labels/latest
inputs:
scoring_url: ${{parent.inputs.endpoint_url}}
deployment_name: ${{parent.inputs.deployment_name}}
authentication_type: ${{parent.inputs.authentication_type}}
connection_name: ${{parent.inputs.connection_name}}
additional_headers: ""
debug_mode: ${{parent.inputs.debug_mode}}
ensure_ascii: false
max_retry_time_interval: 300
initial_worker_count: 5
max_worker_count: 200
model_type: oss
outputs:
batch_score_config:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
# Batch score job
batch_score:
type: parallel
component: azureml://registries/azureml/components/batch_score_oss/labels/latest
inputs:
async_mode: False
data_input_table: ${{parent.jobs.batch_score_preprocessor.outputs.formatted_data}}
configuration_file: ${{parent.jobs.config_generator.outputs.batch_score_config}}
outputs:
job_output_path:
type: uri_file
mini_batch_results_output_directory:
type: uri_folder
resources:
instance_count: 1
max_concurrency_per_instance: 8
retry_settings:
timeout: 6000
max_retries: 10
environment_variables:
BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180'
BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False'
BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300'
batch_score_postprocessor:
type: command
component: azureml://registries/azureml/components/batch_output_formatter/labels/latest
limits: {}
inputs:
batch_inference_output:
type: uri_folder
path: ${{parent.jobs.batch_score.outputs.mini_batch_results_output_directory}}
ground_truth_input:
type: uri_file
path: ${{parent.jobs.batch_score_preprocessor.outputs.ground_truth_metadata}}
model_type: oai
label_column_name: ${{parent.inputs.ground_truth_column_name}}
endpoint_url: ${{parent.inputs.endpoint_url}}
handle_response_failure: use_fallback
min_endpoint_success_ratio: 0.0
is_performance_test: false
use_tiktoken: false
outputs:
predictions:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
performance_metadata:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
ground_truth:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
successful_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
failed_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
unsafe_content_blocked_requests:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
postprocessor:
type: command
component: azureml://registries/azureml/components/inference_postprocessor/labels/latest
limits: {}
inputs:
ground_truth_dataset:
type: uri_folder
path: ${{parent.jobs.batch_score_postprocessor.outputs.ground_truth}}
prediction_dataset:
type: uri_folder
path: ${{parent.jobs.batch_score_postprocessor.outputs.predictions}}
ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
prediction_column_name: ${{parent.inputs.prediction_column_name}}
separator: '
'
find_first: 1,2,3,4
outputs:
output_dataset_result:
type: uri_file
path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl
compute_metrics:
type: command
component: azureml://registries/azureml/components/compute_metrics/labels/latest
limits: {}
inputs:
ground_truth:
type: uri_folder
path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
prediction:
type: uri_folder
path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}}
task: ${{parent.inputs.task}}
ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}}
prediction_column_name: ${{parent.inputs.prediction_column_name}}
evaluation_config_params: '{"regexes_to_ignore": ["\\W"]}'
outputs:
evaluation_result:
type: uri_folder
tags:
workflow: distill_llm_benchmark
evaluation_type: text-generation
properties:
_azureml.evaluation_run: Benchmark
settings:
force_rerun: false
default_compute: azureml:serverless