assets/training/finetune_acft_hf_nlp/components/validation/common/spec.yaml (181 lines of code) (raw):
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: ft_nlp_common_validation
version: 0.0.73
type: command
is_deterministic: true
display_name: Common Validation Component
description: Component to validate the finetune job against Validation Service
environment: azureml://registries/azureml/environments/acft-hf-nlp-gpu/versions/87
code: ../../../src/validation
inputs:
mlflow_model_path:
type: mlflow_model
optional: true
description: MLflow model asset path. Special characters like \ and ' are invalid in the parameter value.
mode: rw_mount
compute_finetune:
type: string
optional: true
default: serverless
description: compute to be used for finetune component eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used
compute_model_import:
type: string
optional: true
default: serverless
description: compute to be used for model import component eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used
compute_data_preprocess:
type: string
optional: true
default: serverless
description: compute to be used for data preprocess component eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used
task_name:
type: string
optional: true
default: SingleLabelClassification
description: Finetuning task type
num_nodes_finetune:
type: integer
min: 1
default: 1
optional: true
description: number of nodes to be used for finetuning (used for distributed training)
number_of_gpu_to_use_finetuning:
type: integer
optional: true
default: 1
description: Number of GPUs to use for finetuning
train_file_path:
type: uri_file
optional: true
description: Path to the registered training data asset. The supported data formats are `jsonl`, `json`, `csv`, `tsv` and `parquet`. Special characters like \ and ' are invalid in the parameter value.
mode: rw_mount
validation_file_path:
type: uri_file
optional: true
description: Path to the registered validation data asset. The supported data formats are `jsonl`, `json`, `csv`, `tsv` and `parquet`. Special characters like \ and ' are invalid in the parameter value.
mode: rw_mount
test_file_path:
type: uri_file
optional: true
description: Path to the registered test data asset. The supported data formats are `jsonl`, `json`, `csv`, `tsv` and `parquet`. Special characters like \ and ' are invalid in the parameter value.
mode: rw_mount
train_mltable_path:
type: mltable
optional: true
description: Path to the registered training data asset in `mltable` format. Special characters like \ and ' are invalid in the parameter value.
validation_mltable_path:
type: mltable
optional: true
description: Path to the registered validation data asset in `mltable` format. Special characters like \ and ' are invalid in the parameter value.
test_mltable_path:
type: mltable
optional: true
description: Path to the registered test data asset in `mltable` format. Special characters like \ and ' are invalid in the parameter value.
user_column_names:
type: string
optional: true
description: Comma separated list of column names to be used for training
# Validation parameters
system_properties:
type: string
optional: true
description: Validation parameters propagated from pipeline.
num_train_epochs:
type: integer
optional: true
description: Number of training epochs
max_steps:
type: integer
optional: true
description: Maximum number of training steps
per_device_train_batch_size:
type: integer
optional: true
description: Batch size per GPU/CPU for training
per_device_eval_batch_size:
type: integer
optional: true
description: Batch size per GPU/CPU for evaluation
auto_find_batch_size:
type: string
enum:
- "true"
- "false"
default: "false"
optional: true
description: If set to true, will enable auto_find_batch_size for training
learning_rate:
type: number
optional: true
description: Learning rate for optimizer
adam_beta1:
type: number
optional: true
description: Beta1 hyperparameter for the Adam optimizer
adam_beta2:
type: number
optional: true
description: Beta2 hyperparameter for the Adam optimizer
adam_epsilon:
type: number
optional: true
description: Epsilon hyperparameter for the Adam optimizer
apply_deepspeed:
type: string
enum:
- "true"
- "false"
default: "false"
optional: true
description: If set to true, will enable deepspeed for training
precision:
type: string
enum:
- "32"
- "16"
default: "32"
optional: true
description: Apply mixed precision training. This can reduce memory footprint by performing operations in half-precision.
apply_lora:
type: string
enum:
- "true"
- "false"
default: "false"
optional: true
description: If "true" enables lora.
apply_ort:
type: string
enum:
- "true"
- "false"
default: "false"
optional: true
description: If set to true, will use the ONNXRunTime training
deepspeed_stage:
type: string
optional: true
default: "2"
enum:
- "2"
- "3"
description: This parameter configures which DEFAULT deepspeed config to be used - stage2 or stage3. The default choice is stage2. Note that, this parameter is ONLY applicable when user doesn't pass any config information via deepspeed port.
ignore_mismatched_sizes:
type: string
enum:
- "true"
- "false"
default: "true"
optional: true
description: Not setting this flag will raise an error if some of the weights from the checkpoint do not have the same size as the weights of the model.
max_seq_length:
type: integer
optional: true
default: -1
description: Controls the maximum length to use when pad_to_max_length parameter is set to `true`. Default is -1 which means the padding is done up to the model's max length. Else will be padded to `max_seq_length`.
outputs:
validation_info:
type: uri_file
description: Validation status.
mode: rw_mount
command: >-
python validation.py $[[--system_properties '${{inputs.system_properties}}']] --validation-info '${{outputs.validation_info}}'