assets/training/model_management/components/import_model/spec.yaml (294 lines of code) (raw):

$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json type: pipeline name: import_model display_name: Import model description: Import a model into a workspace or a registry version: 0.0.42 # Pipeline inputs inputs: # pipeline specific compute compute: type: string optional: true default: serverless description: Common compute for model download, MLflow conversion and registration. eg. provide 'FT-Cluster' if your compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value. If compute name is provided, instance_type field will be ignored and the respective cluster will be used instance_type: type: string optional: true default: STANDARD_NC6s_v3 description: Instance type to be used for the component in case of serverless compute, eg. STANDARD_NC6s_v3. The parameter compute must be set to 'serverless' for instance_type to be used ## Inputs for download model model_source: type: string description: Storage containers from where model will be sourced from default: Huggingface enum: - AzureBlob - GIT - Huggingface model_id: type: string description: A valid model id for the model source selected. For example you can specify `bert-base-uncased` for importing HuggingFace bert base uncased model. Please specify the complete URL if **GIT** or **AzureBlob** is selected in `model_source` model_flavor: type: string enum: - HFTransformersV2 - OSS default: HFTransformersV2 optional: false description: Flavor of MLFlow to which model the model is converted to. model_framework: type: string enum: - Huggingface - MMLab - llava - AutoML default: Huggingface optional: false description: Framework from which model is imported from. vllm_enabled: type: boolean description: Enable vllm in the converted model default: false optional: false token: type: string description: If set use it to access the private models or authenticate the user. For example, user can get the token for HF private model by creating account in Huggingface, accept the condition for models that needs to be downloaded and create access token from browser. For more details please visit - https://huggingface.co/docs/hub/security-tokens optional: true ## Inputs for the MlFLow conversion license_file_path: type: uri_file description: Path to the license file optional: true task_name: description: A Hugging face task on which model was trained on enum: - chat-completion - fill-mask - token-classification - question-answering - summarization - text-generation - text2text-generation - text-classification - translation - image-classification - image-classification-multilabel - image-object-detection - image-instance-segmentation - image-to-text - text-to-image - text-to-image-inpainting - image-text-to-text - image-to-image - zero-shot-image-classification - mask-generation - video-multi-object-tracking - visual-question-answering - image-feature-extraction optional: true type: string hf_config_args: type: string description: | Provide args that should be used to load Huggingface model config. eg: trust_remote_code=True; optional: true hf_tokenizer_args: type: string description: | Provide args that should be used to load Huggingface model tokenizer. eg: trust_remote_code=True, device_map=auto, optional: true hf_model_args: type: string description: | Provide args that should be used to load Huggingface model. eg: trust_remote_code=True, device_map=auto, low_cpu_mem_usage=True optional: true hf_pipeline_args: type: string description: | Provide pipeline args that should be used while loading the hugging face model. Dont use quotes. If value cannot be eval'ed it will be taken as as string. eg: trust_remote_code=True, device_map=auto optional: true hf_config_class: type: string description: AutoConfig class may not be sufficient to load config for some of the models. You can use this parameter to send Config class name as it is optional: true hf_model_class: type: string description: AutoModel classes may not be sufficient to load some of the models. You can use this parameter to send Model class name as it is optional: true hf_tokenizer_class: type: string description: AutoTokenizer class may not be sufficient to load tokenizer for some of the models. You can use this parameter to send Config class name as it is optional: true hf_use_experimental_features: type: boolean description: Enable experimental features for hugging face MLflow model conversion default: false optional: true extra_pip_requirements: type: string description: | Extra pip dependencies that MLflow model should capture as part of conversion. This would be used to create environment while loading the model for inference. Pip dependencies expressed as below. Do not use quotes for passing. eg: pkg1==1.0, pkg2, pkg3==1.0 optional: true ## Inputs for MLflow local validation local_validation_test_data: type: uri_file optional: true description: Test data for MLflow local validation. Validation will be skipped if test data is not provided local_validation_column_rename_map: type: string optional: true description: | Provide mapping for local validation test data column names, that should be renamed before inferencing eg: col1:ren1; col2:ren2; col3:ren3 ## Inputs for Model registration custom_model_name: type: string optional: true description: Model name to use in the registration. If name already exists, the version will be auto incremented model_version: type: string optional: true description: Model version in workspace/registry. If the same model name and version exists, the version will be auto incremented model_description: type: string optional: true description: Description of the model that will be shown in AzureML registry or workspace registry_name: type: string optional: true description: Name of the AzureML asset registry where the model will be registered. Model will be registered in a workspace if this is unspecified model_metadata: type: uri_file optional: true description: A JSON or a YAML file that contains model metadata confirming to Model V2 [contract](https://azuremlschemas.azureedge.net/latest/model.schema.json) update_existing_model: type: boolean default: false description: If set to true, will update the existing model. If set to false, will create a new model. optional: true # Pipeline outputs outputs: mlflow_model_folder: description: Output path for the converted MLflow model type: mlflow_model model_registration_details: description: Output folder with a file which captures transformations applied above and registration details in JSON file type: uri_folder jobs: validation_trigger_import: component: azureml:validation_trigger_import:0.0.15 compute: ${{parent.inputs.compute}} resources: instance_type: '${{parent.inputs.instance_type}}' inputs: compute: ${{parent.inputs.compute}} task_name: ${{parent.inputs.task_name}} license_file_path: ${{parent.inputs.license_file_path}} instance_type: ${{parent.inputs.instance_type}} model_source: ${{parent.inputs.model_source}} model_id: ${{parent.inputs.model_id}} model_version: ${{parent.inputs.model_version}} model_flavor: ${{parent.inputs.model_flavor}} model_description: ${{parent.inputs.model_description}} model_metadata: ${{parent.inputs.model_metadata}} registry_name: ${{parent.inputs.registry_name}} custom_model_name: ${{parent.inputs.custom_model_name}} local_validation_test_data: ${{parent.inputs.local_validation_test_data}} local_validation_column_rename_map: ${{parent.inputs.local_validation_column_rename_map}} extra_pip_requirements: ${{parent.inputs.extra_pip_requirements}} hf_config_args: ${{parent.inputs.hf_config_args}} hf_tokenizer_args: ${{parent.inputs.hf_tokenizer_args}} hf_model_args: ${{parent.inputs.hf_model_args}} hf_pipeline_args: ${{parent.inputs.hf_pipeline_args}} hf_config_class: ${{parent.inputs.hf_config_class}} hf_model_class: ${{parent.inputs.hf_model_class}} hf_tokenizer_class: ${{parent.inputs.hf_tokenizer_class}} hf_use_experimental_features: ${{parent.inputs.hf_use_experimental_features}} update_existing_model: ${{parent.inputs.update_existing_model}} outputs: validation_info: type: uri_file download_model: component: azureml:download_model:0.0.31 compute: ${{parent.inputs.compute}} resources: instance_type: '${{parent.inputs.instance_type}}' identity: type: user_identity inputs: model_source: ${{parent.inputs.model_source}} model_id: ${{parent.inputs.model_id}} validation_info: ${{parent.jobs.validation_trigger_import.outputs.validation_info}} update_existing_model: ${{parent.inputs.update_existing_model}} token: ${{parent.inputs.token}} outputs: model_download_metadata: type: uri_file model_output: type: uri_folder convert_model_to_mlflow: component: azureml:convert_model_to_mlflow:0.0.37 compute: ${{parent.inputs.compute}} resources: instance_type: '${{parent.inputs.instance_type}}' identity: type: user_identity inputs: task_name: ${{parent.inputs.task_name}} model_flavor: ${{parent.inputs.model_flavor}} vllm_enabled: ${{parent.inputs.vllm_enabled}} license_file_path: ${{parent.inputs.license_file_path}} model_framework: ${{parent.inputs.model_framework}} model_download_metadata: ${{parent.jobs.download_model.outputs.model_download_metadata}} model_path: ${{parent.jobs.download_model.outputs.model_output}} hf_config_args: ${{parent.inputs.hf_config_args}} hf_tokenizer_args: ${{parent.inputs.hf_tokenizer_args}} hf_model_args: ${{parent.inputs.hf_model_args}} hf_pipeline_args: ${{parent.inputs.hf_pipeline_args}} hf_config_class: ${{parent.inputs.hf_config_class}} hf_model_class: ${{parent.inputs.hf_model_class}} hf_tokenizer_class: ${{parent.inputs.hf_tokenizer_class}} hf_use_experimental_features: ${{parent.inputs.hf_use_experimental_features}} extra_pip_requirements: ${{parent.inputs.extra_pip_requirements}} outputs: mlflow_model_folder: type: mlflow_model mlflow_model_local_validation: component: azureml:mlflow_model_local_validation:0.0.17 compute: ${{parent.inputs.compute}} resources: instance_type: '${{parent.inputs.instance_type}}' inputs: model_path: ${{parent.jobs.convert_model_to_mlflow.outputs.mlflow_model_folder}} test_data_path: ${{parent.inputs.local_validation_test_data}} column_rename_map: ${{parent.inputs.local_validation_column_rename_map}} task_name: ${{parent.inputs.task_name}} outputs: mlflow_model_folder: ${{parent.outputs.mlflow_model_folder}} register_model: component: azureml:register_model:0.0.20 compute: ${{parent.inputs.compute}} resources: instance_type: '${{parent.inputs.instance_type}}' identity: type: user_identity inputs: model_name: ${{parent.inputs.custom_model_name}} model_version: ${{parent.inputs.model_version}} model_description: ${{parent.inputs.model_description}} registry_name: ${{parent.inputs.registry_name}} model_metadata: ${{parent.inputs.model_metadata}} model_type: mlflow_model model_download_metadata: ${{parent.jobs.download_model.outputs.model_download_metadata}} model_path: ${{parent.jobs.mlflow_model_local_validation.outputs.mlflow_model_folder}} outputs: registration_details_folder: ${{parent.outputs.model_registration_details}} tags: Preview: ""