def generate_check_duplicated_files_job_params_fn()

in components/dpu-workflow/src/docs_processing_orchestrator.py [0:0]


def generate_check_duplicated_files_job_params_fn(**context):
    input_bucket = context["params"]["input_bucket"]
    input_folder = context["params"]["input_folder"]
    input_folder_ful_uri = (
        input_bucket if not input_folder else f"{input_bucket}/{input_folder}"
    )
    process_folder = context["ti"].xcom_pull(
        task_ids="initial_load_from_input_bucket.create_process_folder",
        key="process_folder",
    )
    output_folder = f'{os.environ.get("DPU_PROCESS_BUCKET")}/{process_folder}/workflow-io/check_duplicated_files'
    context["ti"].xcom_push(key="output_folder", value=output_folder)
    return cloud_run_utils.get_doc_registry_duplicate_job_override(
        input_folder_ful_uri, output_folder
    )