sdk/python/foundation-models/system/distillation/nlu_qa/pipelines/openbookqa.yaml (279 lines of code) (raw):

$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json type: pipeline display_name: openbookqa_eval description: Evaluate distilled models on openbook qa dataset inputs: task: question-answering sample_ratio: 0.01 ground_truth_column_name: completion prediction_column_name: prediction # batch_score inputs endpoint_url: "" deployment_name: "" authentication_type: azureml_workspace_connection connection_name: "" debug_mode: False jobs: downloader: type: command component: azureml://registries/azureml/components/dataset_downloader/labels/latest limits: {} inputs: dataset_name: openbookqa configuration: main split: validation outputs: output_dataset: type: uri_folder sampler: type: command component: azureml://registries/azureml/components/dataset_sampler/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.downloader.outputs.output_dataset}} sampling_style: head sampling_ratio: ${{parent.inputs.sample_ratio}} random_seed: 0 outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl preprocessor: type: command component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.sampler.outputs.output_dataset}} template_input: '{"question":{{question_stem}}, "choices":{{choices.text}},"answer":{{answerKey}}}' encoder_config: '{"column_name": "answer", "A": 1, "B": 2, "C": 3, "D": 4}' outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl downloader_fewshot: type: command component: azureml://registries/azureml/components/dataset_downloader/labels/latest limits: {} inputs: dataset_name: openbookqa configuration: main split: train outputs: output_dataset: type: uri_folder sampler_fewshot: type: command component: azureml://registries/azureml/components/dataset_sampler/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.downloader_fewshot.outputs.output_dataset}} sampling_style: head n_samples: 8 random_seed: 0 outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl preprocessor_fewshot: type: command component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.sampler_fewshot.outputs.output_dataset}} template_input: '{"question":{{question_stem}}, "choices":{{choices.text}}, "answer":{{answerKey}}}' encoder_config: '{"column_name": "answer", "A": 1, "B": 2, "C": 3, "D": 4}' outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl promptcrafter: type: command component: azureml://registries/azureml/components/prompt_crafter/labels/latest limits: {} inputs: test_data: type: uri_folder path: ${{parent.jobs.preprocessor.outputs.output_dataset}} few_shot_data: type: uri_file path: ${{parent.jobs.preprocessor_fewshot.outputs.output_dataset}} prompt_type: completions prefix: "\n\nYou are given a problem and asked to choose between answers in options 1, 2, 3, or 4. \ There are examples within <example> tags for you to learn from. For the last question respond \ with only the number of the option you choose. This number must be within the tags \ <answer> and </answer>. Do not add other text to the response.\n\nHuman:" prompt_pattern: "\nQuestion: {{question}}\n\ Answer options:\n\ (1) {{choices[0]}}\n\ (2) {{choices[1]}}\n\ (3) {{choices[2]}}\n\ (4) {{choices[3]}}\n\ The answer is:\n\nAssistant:\n\n" n_shots: 8 output_pattern: '{{answer}}' few_shot_separator: "\n\n" few_shot_pattern: "<example>\n\ Question: {{question}}\n\ Answer options:\n\ (1) {{choices[0]}}\n\ (2) {{choices[1]}}\n\ (3) {{choices[2]}}\n\ (4) {{choices[3]}}\n\ The answer is: {{answer}}\n\ </example>" random_seed: 0 outputs: output_file: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl batch_score_preprocessor: type: command component: azureml://registries/azureml/components/batch_inference_preparer/labels/latest limits: {} inputs: input_dataset: type: uri_file path: ${{parent.jobs.promptcrafter.outputs.output_file}} model_type: oai batch_input_pattern: '{"messages": [{"role": "user", "content": "###<prompt>"}], "temperature": 0.6, "top_p": 1.0, "max_new_tokens": 2048, "frequency_penalty": 0.0, "presence_penalty": 0.0}' label_column_name: ${{parent.inputs.ground_truth_column_name}} is_performance_test: false outputs: formatted_data: type: mltable path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}} ground_truth_metadata: type: uri_folder path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}} config_generator: type: command component: azureml://registries/azureml/components/batch_benchmark_config_generator/labels/latest inputs: scoring_url: ${{parent.inputs.endpoint_url}} deployment_name: ${{parent.inputs.deployment_name}} authentication_type: ${{parent.inputs.authentication_type}} connection_name: ${{parent.inputs.connection_name}} additional_headers: "" debug_mode: ${{parent.inputs.debug_mode}} ensure_ascii: false max_retry_time_interval: 300 initial_worker_count: 5 max_worker_count: 200 model_type: oss outputs: batch_score_config: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl # Batch score job batch_score: type: parallel component: azureml://registries/azureml/components/batch_score_oss/labels/latest inputs: async_mode: False data_input_table: ${{parent.jobs.batch_score_preprocessor.outputs.formatted_data}} configuration_file: ${{parent.jobs.config_generator.outputs.batch_score_config}} outputs: job_output_path: type: uri_file mini_batch_results_output_directory: type: uri_folder resources: instance_count: 1 max_concurrency_per_instance: 1 retry_settings: timeout: 6000 max_retries: 10 environment_variables: BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180' BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False' BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300' batch_score_postprocessor: type: command component: azureml://registries/azureml/components/batch_output_formatter/labels/latest limits: {} inputs: batch_inference_output: type: uri_folder path: ${{parent.jobs.batch_score.outputs.mini_batch_results_output_directory}} ground_truth_input: type: uri_file path: ${{parent.jobs.batch_score_preprocessor.outputs.ground_truth_metadata}} model_type: oai label_column_name: ${{parent.inputs.ground_truth_column_name}} endpoint_url: ${{parent.inputs.endpoint_url}} handle_response_failure: use_fallback min_endpoint_success_ratio: 0.0 is_performance_test: false use_tiktoken: false outputs: predictions: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl performance_metadata: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl ground_truth: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl successful_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl failed_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl unsafe_content_blocked_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl postprocessor: type: command component: azureml://registries/azureml/components/inference_postprocessor/labels/latest limits: {} inputs: ground_truth_dataset: type: uri_folder path: ${{parent.jobs.batch_score_postprocessor.outputs.ground_truth}} prediction_dataset: type: uri_folder path: ${{parent.jobs.batch_score_postprocessor.outputs.predictions}} ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}} prediction_column_name: ${{parent.inputs.prediction_column_name}} separator: ' ' find_first: 1,2,3,4 outputs: output_dataset_result: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl compute_metrics: type: command component: azureml://registries/azureml/components/compute_metrics/labels/latest limits: {} inputs: ground_truth: type: uri_folder path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}} prediction: type: uri_folder path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}} task: ${{parent.inputs.task}} ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}} prediction_column_name: ${{parent.inputs.prediction_column_name}} evaluation_config_params: '{"regexes_to_ignore": ["\\.0+$", ","]}' outputs: evaluation_result: type: uri_folder tags: workflow: distill_llm_benchmark evaluation_type: question-answering properties: _azureml.evaluation_run: Benchmark settings: force_rerun: false default_compute: azureml:serverless