assets/training/model_management/components/import_model/spec.yaml (294 lines of code) (raw):
$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
type: pipeline
name: import_model
display_name: Import model
description: Import a model into a workspace or a registry
version: 0.0.42
# Pipeline inputs
inputs:
# pipeline specific compute
compute:
type: string
optional: true
default: serverless
description: Common compute for model download, MLflow conversion and registration. eg. provide 'FT-Cluster' if
your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value.
If compute name is provided, instance_type field will be ignored and the respective cluster will be used
instance_type:
type: string
optional: true
default: STANDARD_NC6s_v3
description: Instance type to be used for the component in case of serverless compute, eg. STANDARD_NC6s_v3.
The parameter compute must be set to 'serverless' for instance_type to be used
## Inputs for download model
model_source:
type: string
description: Storage containers from where model will be sourced from
default: Huggingface
enum:
- AzureBlob
- GIT
- Huggingface
model_id:
type: string
description: A valid model id for the model source selected. For example you can specify `bert-base-uncased` for importing HuggingFace bert base uncased model. Please specify the complete URL if **GIT** or **AzureBlob** is selected in `model_source`
model_flavor:
type: string
enum:
- HFTransformersV2
- OSS
default: HFTransformersV2
optional: false
description: Flavor of MLFlow to which model the model is converted to.
model_framework:
type: string
enum:
- Huggingface
- MMLab
- llava
- AutoML
default: Huggingface
optional: false
description: Framework from which model is imported from.
vllm_enabled:
type: boolean
description: Enable vllm in the converted model
default: false
optional: false
token:
type: string
description: If set use it to access the private models or authenticate the user. For example, user can get the token for HF private model by creating account in Huggingface, accept the condition for models that needs to be downloaded and create access token from browser. For more details please visit - https://huggingface.co/docs/hub/security-tokens
optional: true
## Inputs for the MlFLow conversion
license_file_path:
type: uri_file
description: Path to the license file
optional: true
task_name:
description: A Hugging face task on which model was trained on
enum:
- chat-completion
- fill-mask
- token-classification
- question-answering
- summarization
- text-generation
- text2text-generation
- text-classification
- translation
- image-classification
- image-classification-multilabel
- image-object-detection
- image-instance-segmentation
- image-to-text
- text-to-image
- text-to-image-inpainting
- image-text-to-text
- image-to-image
- zero-shot-image-classification
- mask-generation
- video-multi-object-tracking
- visual-question-answering
- image-feature-extraction
optional: true
type: string
hf_config_args:
type: string
description: |
Provide args that should be used to load Huggingface model config.
eg: trust_remote_code=True;
optional: true
hf_tokenizer_args:
type: string
description: |
Provide args that should be used to load Huggingface model tokenizer.
eg: trust_remote_code=True, device_map=auto,
optional: true
hf_model_args:
type: string
description: |
Provide args that should be used to load Huggingface model.
eg: trust_remote_code=True, device_map=auto, low_cpu_mem_usage=True
optional: true
hf_pipeline_args:
type: string
description: |
Provide pipeline args that should be used while loading the hugging face model.
Dont use quotes. If value cannot be eval'ed it will be taken as as string.
eg: trust_remote_code=True, device_map=auto
optional: true
hf_config_class:
type: string
description: AutoConfig class may not be sufficient to load config for some of the models. You can use this parameter to send Config class name as it is
optional: true
hf_model_class:
type: string
description: AutoModel classes may not be sufficient to load some of the models. You can use this parameter to send Model class name as it is
optional: true
hf_tokenizer_class:
type: string
description: AutoTokenizer class may not be sufficient to load tokenizer for some of the models. You can use this parameter to send Config class name as it is
optional: true
hf_use_experimental_features:
type: boolean
description: Enable experimental features for hugging face MLflow model conversion
default: false
optional: true
extra_pip_requirements:
type: string
description: |
Extra pip dependencies that MLflow model should capture as part of conversion. This would be used to create environment while loading the model for inference.
Pip dependencies expressed as below. Do not use quotes for passing.
eg: pkg1==1.0, pkg2, pkg3==1.0
optional: true
## Inputs for MLflow local validation
local_validation_test_data:
type: uri_file
optional: true
description: Test data for MLflow local validation. Validation will be skipped if test data is not provided
local_validation_column_rename_map:
type: string
optional: true
description: |
Provide mapping for local validation test data column names, that should be renamed before inferencing
eg: col1:ren1; col2:ren2; col3:ren3
## Inputs for Model registration
custom_model_name:
type: string
optional: true
description: Model name to use in the registration. If name already exists, the version will be auto incremented
model_version:
type: string
optional: true
description: Model version in workspace/registry. If the same model name and version exists, the version will be auto incremented
model_description:
type: string
optional: true
description: Description of the model that will be shown in AzureML registry or workspace
registry_name:
type: string
optional: true
description: Name of the AzureML asset registry where the model will be registered. Model will be registered in a workspace if this is unspecified
model_metadata:
type: uri_file
optional: true
description: A JSON or a YAML file that contains model metadata confirming to Model V2 [contract](https://azuremlschemas.azureedge.net/latest/model.schema.json)
update_existing_model:
type: boolean
default: false
description: If set to true, will update the existing model. If set to false, will create a new model.
optional: true
# Pipeline outputs
outputs:
mlflow_model_folder:
description: Output path for the converted MLflow model
type: mlflow_model
model_registration_details:
description: Output folder with a file which captures transformations applied above and registration details in JSON file
type: uri_folder
jobs:
validation_trigger_import:
component: azureml:validation_trigger_import:0.0.15
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
inputs:
compute: ${{parent.inputs.compute}}
task_name: ${{parent.inputs.task_name}}
license_file_path: ${{parent.inputs.license_file_path}}
instance_type: ${{parent.inputs.instance_type}}
model_source: ${{parent.inputs.model_source}}
model_id: ${{parent.inputs.model_id}}
model_version: ${{parent.inputs.model_version}}
model_flavor: ${{parent.inputs.model_flavor}}
model_description: ${{parent.inputs.model_description}}
model_metadata: ${{parent.inputs.model_metadata}}
registry_name: ${{parent.inputs.registry_name}}
custom_model_name: ${{parent.inputs.custom_model_name}}
local_validation_test_data: ${{parent.inputs.local_validation_test_data}}
local_validation_column_rename_map: ${{parent.inputs.local_validation_column_rename_map}}
extra_pip_requirements: ${{parent.inputs.extra_pip_requirements}}
hf_config_args: ${{parent.inputs.hf_config_args}}
hf_tokenizer_args: ${{parent.inputs.hf_tokenizer_args}}
hf_model_args: ${{parent.inputs.hf_model_args}}
hf_pipeline_args: ${{parent.inputs.hf_pipeline_args}}
hf_config_class: ${{parent.inputs.hf_config_class}}
hf_model_class: ${{parent.inputs.hf_model_class}}
hf_tokenizer_class: ${{parent.inputs.hf_tokenizer_class}}
hf_use_experimental_features: ${{parent.inputs.hf_use_experimental_features}}
update_existing_model: ${{parent.inputs.update_existing_model}}
outputs:
validation_info:
type: uri_file
download_model:
component: azureml:download_model:0.0.31
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
identity:
type: user_identity
inputs:
model_source: ${{parent.inputs.model_source}}
model_id: ${{parent.inputs.model_id}}
validation_info: ${{parent.jobs.validation_trigger_import.outputs.validation_info}}
update_existing_model: ${{parent.inputs.update_existing_model}}
token: ${{parent.inputs.token}}
outputs:
model_download_metadata:
type: uri_file
model_output:
type: uri_folder
convert_model_to_mlflow:
component: azureml:convert_model_to_mlflow:0.0.37
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
identity:
type: user_identity
inputs:
task_name: ${{parent.inputs.task_name}}
model_flavor: ${{parent.inputs.model_flavor}}
vllm_enabled: ${{parent.inputs.vllm_enabled}}
license_file_path: ${{parent.inputs.license_file_path}}
model_framework: ${{parent.inputs.model_framework}}
model_download_metadata: ${{parent.jobs.download_model.outputs.model_download_metadata}}
model_path: ${{parent.jobs.download_model.outputs.model_output}}
hf_config_args: ${{parent.inputs.hf_config_args}}
hf_tokenizer_args: ${{parent.inputs.hf_tokenizer_args}}
hf_model_args: ${{parent.inputs.hf_model_args}}
hf_pipeline_args: ${{parent.inputs.hf_pipeline_args}}
hf_config_class: ${{parent.inputs.hf_config_class}}
hf_model_class: ${{parent.inputs.hf_model_class}}
hf_tokenizer_class: ${{parent.inputs.hf_tokenizer_class}}
hf_use_experimental_features: ${{parent.inputs.hf_use_experimental_features}}
extra_pip_requirements: ${{parent.inputs.extra_pip_requirements}}
outputs:
mlflow_model_folder:
type: mlflow_model
mlflow_model_local_validation:
component: azureml:mlflow_model_local_validation:0.0.17
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
inputs:
model_path: ${{parent.jobs.convert_model_to_mlflow.outputs.mlflow_model_folder}}
test_data_path: ${{parent.inputs.local_validation_test_data}}
column_rename_map: ${{parent.inputs.local_validation_column_rename_map}}
task_name: ${{parent.inputs.task_name}}
outputs:
mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}}
register_model:
component: azureml:register_model:0.0.20
compute: ${{parent.inputs.compute}}
resources:
instance_type: '${{parent.inputs.instance_type}}'
identity:
type: user_identity
inputs:
model_name: ${{parent.inputs.custom_model_name}}
model_version: ${{parent.inputs.model_version}}
model_description: ${{parent.inputs.model_description}}
registry_name: ${{parent.inputs.registry_name}}
model_metadata: ${{parent.inputs.model_metadata}}
model_type: mlflow_model
model_download_metadata: ${{parent.jobs.download_model.outputs.model_download_metadata}}
model_path: ${{parent.jobs.mlflow_model_local_validation.outputs.mlflow_model_folder}}
outputs:
registration_details_folder: ${{parent.outputs.model_registration_details}}
tags:
Preview: ""