sdk/python/foundation-models/system/distillation/math/pipelines/gsm8k.yaml (265 lines of code) (raw):

$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json type: pipeline display_name: gsm8k_eval description: Evaluate distilled models on grade school math dataset inputs: task: question-answering sample_ratio: 0.01 ground_truth_column_name: completion prediction_column_name: prediction # batch_score inputs endpoint_url: "" deployment_name: "" authentication_type: azureml_workspace_connection connection_name: "" debug_mode: False jobs: downloader: type: command component: azureml://registries/azureml/components/dataset_downloader/labels/latest limits: {} inputs: dataset_name: gsm8k configuration: main split: test outputs: output_dataset: type: uri_folder sampler: type: command component: azureml://registries/azureml/components/dataset_sampler/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.downloader.outputs.output_dataset}} sampling_style: head sampling_ratio: ${{parent.inputs.sample_ratio}} random_seed: 0 outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl preprocessor: type: command component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.sampler.outputs.output_dataset}} template_input: "{\n \"question\":{{question}},\n \"solution\":{{answer.split(\"\ \ ####\")[0]}},\n \"answer\":{{answer.split(\"#### \")[-1]|string}}\n}" outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl downloader_fewshot: type: command component: azureml://registries/azureml/components/dataset_downloader/labels/latest limits: {} inputs: configuration: fewshot split: dev script_path: type: uri_file path: https://raw.githubusercontent.com/Azure/azureml-assets/main/assets/aml-benchmark/scripts/data_loaders/gsm8k_static_shots.py outputs: output_dataset: type: uri_folder sampler_fewshot: type: command component: azureml://registries/azureml/components/dataset_sampler/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.downloader_fewshot.outputs.output_dataset}} sampling_style: head n_samples: 8 random_seed: 0 outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl preprocessor_fewshot: type: command component: azureml://registries/azureml/components/dataset_preprocessor/labels/latest limits: {} inputs: dataset: type: uri_folder path: ${{parent.jobs.sampler_fewshot.outputs.output_dataset}} template_input: "{\n \"question\":{{question}},\n \"solution\":{{answer.split(\"\ \ ####\")[0]}},\n \"answer\":{{answer.split(\"#### \")[-1]|string}}\n}" outputs: output_dataset: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl promptcrafter: type: command component: azureml://registries/azureml/components/prompt_crafter/labels/latest limits: {} inputs: test_data: type: uri_folder path: ${{parent.jobs.preprocessor.outputs.output_dataset}} few_shot_data: type: uri_file path: ${{parent.jobs.preprocessor_fewshot.outputs.output_dataset}} prompt_type: completions prompt_pattern: 'Q: {{question}} A:' n_shots: 8 output_pattern: '{{solution}} The answer is {{answer}}.' system_message: You are taking a math test. When answering the question, please ensure that the last number you write in the response is the correct, numerical answer to the question. few_shot_separator: "" ground_truth_column_name: answer random_seed: 0 outputs: output_file: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl batch_score_preprocessor: type: command component: azureml://registries/azureml/components/batch_inference_preparer/labels/latest limits: {} inputs: input_dataset: type: uri_file path: ${{parent.jobs.promptcrafter.outputs.output_file}} model_type: oai batch_input_pattern: '{"messages": [{"role": "user", "content": "###<prompt>"}], "temperature": 0.6, "top_p": 1.0, "max_new_tokens": 2048, "frequency_penalty": 0.0, "presence_penalty": 0.0}' label_column_name: ground_truth is_performance_test: false outputs: formatted_data: type: mltable path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}} ground_truth_metadata: type: uri_folder path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}} config_generator: type: command component: azureml://registries/azureml/components/batch_benchmark_config_generator/labels/latest inputs: scoring_url: ${{parent.inputs.endpoint_url}} deployment_name: ${{parent.inputs.deployment_name}} authentication_type: ${{parent.inputs.authentication_type}} connection_name: ${{parent.inputs.connection_name}} additional_headers: "" debug_mode: ${{parent.inputs.debug_mode}} ensure_ascii: false max_retry_time_interval: 300 initial_worker_count: 5 max_worker_count: 200 model_type: oss outputs: batch_score_config: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl # Batch score job batch_score: type: parallel component: azureml://registries/azureml/components/batch_score_oss/labels/latest inputs: async_mode: False data_input_table: ${{parent.jobs.batch_score_preprocessor.outputs.formatted_data}} configuration_file: ${{parent.jobs.config_generator.outputs.batch_score_config}} outputs: job_output_path: type: uri_file mini_batch_results_output_directory: type: uri_folder resources: instance_count: 1 max_concurrency_per_instance: 1 retry_settings: timeout: 6000 max_retries: 10 environment_variables: BATCH_SCORE_INITIAL_REQUEST_TIMEOUT: '180' BATCH_SCORE_DELAY_AFTER_SUCCESSFUL_REQUEST: 'False' BATCH_SCORE_MAX_REQUEST_TIMEOUT: '300' batch_score_postprocessor: type: command component: azureml://registries/azureml/components/batch_output_formatter/labels/latest limits: {} inputs: batch_inference_output: type: uri_folder path: ${{parent.jobs.batch_score.outputs.mini_batch_results_output_directory}} ground_truth_input: type: uri_file path: ${{parent.jobs.batch_score_preprocessor.outputs.ground_truth_metadata}} model_type: oai label_column_name: ${{parent.inputs.ground_truth_column_name}} endpoint_url: ${{parent.inputs.endpoint_url}} handle_response_failure: use_fallback min_endpoint_success_ratio: 0.0 is_performance_test: false use_tiktoken: false outputs: predictions: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl performance_metadata: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl ground_truth: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl successful_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl failed_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl unsafe_content_blocked_requests: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl postprocessor: type: command component: azureml://registries/azureml/components/inference_postprocessor/labels/latest limits: {} inputs: ground_truth_dataset: type: uri_folder path: ${{parent.jobs.batch_score_postprocessor.outputs.ground_truth}} prediction_dataset: type: uri_folder path: ${{parent.jobs.batch_score_postprocessor.outputs.predictions}} ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}} prediction_column_name: ${{parent.inputs.prediction_column_name}} extract_number: last strip_characters: . outputs: output_dataset_result: type: uri_file path: azureml://datastores/${{default_datastore}}/paths/azureml/${{name}}/${{output_name}}.jsonl compute_metrics: type: command component: azureml://registries/azureml/components/compute_metrics/labels/latest limits: {} inputs: ground_truth: type: uri_folder path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}} prediction: type: uri_folder path: ${{parent.jobs.postprocessor.outputs.output_dataset_result}} task: ${{parent.inputs.task}} ground_truth_column_name: ${{parent.inputs.ground_truth_column_name}} prediction_column_name: ${{parent.inputs.prediction_column_name}} evaluation_config_params: '{"regexes_to_ignore": ["\\.0+$", ","]}' outputs: evaluation_result: type: uri_folder tags: workflow: distill_llm_benchmark evaluation_type: text-generation properties: _azureml.evaluation_run: Benchmark settings: force_rerun: false default_compute: azureml:serverless