def run()

in assets/training/model_management/src/run_model_preprocess.py [0:0]


def run():
    """Run preprocess."""
    parser = _get_parser()
    args, _ = parser.parse_known_args()

    model_id = args.model_id
    task_name = args.task_name
    model_flavor = args.model_flavor
    vllm_enabled = False if args.vllm_enabled.lower() == "false" else True
    model_framework = args.model_framework
    hf_config_args = args.hf_config_args
    hf_tokenizer_args = args.hf_tokenizer_args
    hf_model_args = args.hf_model_args
    hf_pipeline_args = args.hf_pipeline_args
    hf_config_class = args.hf_config_class
    hf_model_class = args.hf_model_class
    hf_tokenizer_class = args.hf_tokenizer_class
    hf_use_experimental_features = False if args.hf_use_experimental_features.lower() == "false" else True
    extra_pip_requirements = args.extra_pip_requirements
    inference_base_image = args.inference_base_image

    model_download_metadata_path = args.model_download_metadata
    model_path = args.model_path
    mlflow_model_output_dir = args.mlflow_model_output_dir
    license_file_path = args.license_file_path
    TRUST_CODE_KEY = "trust_remote_code=True"

    if not ModelFramework.has_value(model_framework):
        raise Exception(f"Unsupported model framework {model_framework}")

    preprocess_args = {}
    if model_download_metadata_path:
        with open(model_download_metadata_path) as f:
            download_details = json.load(f)
            preprocess_args.update(download_details.get("tags", {}))
            preprocess_args.update(download_details.get("properties", {}))
            preprocess_args["misc"] = download_details.get("misc", [])

    if task_name is None or \
            (
                not TransformersSupportedTasks.has_value(task_name.lower()) and
                not PyFuncSupportedTasks.has_value(task_name.lower())
            ):
        task_name = preprocess_args.get("task")
        logger.warning("task_name is not provided or not supported. "
                       f"Using task_name={task_name} from model download metadata.")

    if task_name is None:
        supported_tasks = set(TransformersSupportedTasks.list_values() + PyFuncSupportedTasks.list_values())
        raise AzureMLException._with_error(
                AzureMLError.create(UnsupportedTaskType, task_type=args.task_name,
                                    supported_tasks=list(supported_tasks))
            )
    files = check_for_py_files(model_path)
    logger.info(f"check if model folder contains .py files or not: {files}")
    if files:
        if hf_model_args is None or TRUST_CODE_KEY not in hf_model_args:
            hf_model_args = TRUST_CODE_KEY
            logger.warning(f"{TRUST_CODE_KEY} is not provided for hf_model_args. Using {TRUST_CODE_KEY}.")
        if hf_tokenizer_args is None or TRUST_CODE_KEY not in hf_tokenizer_args:
            hf_tokenizer_args = TRUST_CODE_KEY
            logger.warning(f"{TRUST_CODE_KEY} is not provided for hf_tokenizer_args. Using {TRUST_CODE_KEY}.")
        if hf_config_args is None or TRUST_CODE_KEY not in hf_config_args:
            hf_config_args = TRUST_CODE_KEY
            logger.warning(f"{TRUST_CODE_KEY} is not provided for hf_config_args. Using {TRUST_CODE_KEY}.")
    preprocess_args["task"] = task_name.lower()
    preprocess_args["model_id"] = model_id if model_id else preprocess_args.get("model_id")
    preprocess_args["model_flavor"] = model_flavor if model_flavor else "HFtransformersV2"
    preprocess_args["vllm_enabled"] = vllm_enabled
    preprocess_args[HF_CONF.EXTRA_PIP_REQUIREMENTS.value] = extra_pip_requirements
    preprocess_args[HF_CONF.HF_CONFIG_ARGS.value] = hf_config_args
    preprocess_args[HF_CONF.HF_TOKENIZER_ARGS.value] = hf_tokenizer_args
    preprocess_args[HF_CONF.HF_MODEL_ARGS.value] = hf_model_args
    preprocess_args[HF_CONF.HF_PIPELINE_ARGS.value] = hf_pipeline_args
    preprocess_args[HF_CONF.HF_CONFIG_CLASS.value] = hf_config_class
    preprocess_args[HF_CONF.HF_PRETRAINED_CLASS.value] = hf_model_class
    preprocess_args[HF_CONF.HF_TOKENIZER_CLASS.value] = hf_tokenizer_class
    preprocess_args[HF_CONF.HF_USE_EXPERIMENTAL_FEATURES.value] = hf_use_experimental_features
    preprocess_args["inference_base_image"] = inference_base_image

    # update custom dimensions with input parameters
    custom_dimensions.update_custom_dimensions(preprocess_args)

    logger.info(f"Preprocess args : {preprocess_args}")

    with TemporaryDirectory(dir=mlflow_model_output_dir) as working_dir, TemporaryDirectory(
        dir=mlflow_model_output_dir
    ) as temp_dir:
        run_preprocess(model_framework, model_path, working_dir, temp_dir, **preprocess_args)
        shutil.copytree(working_dir, mlflow_model_output_dir, dirs_exist_ok=True)

    # Copy license file to output model path
    if license_file_path:
        # removing the default dumped license when user provides custom license file.
        transformers_license_path = os.path.join(mlflow_model_output_dir, "LICENSE.txt")
        if os.path.isfile(transformers_license_path):
            os.remove(transformers_license_path)

        shutil.copy(license_file_path, mlflow_model_output_dir)

    logger.info(f"listing output directory files: {mlflow_model_output_dir}:\n{os.listdir(mlflow_model_output_dir)}")