in components/dpu-workflow/src/docs_processing_orchestrator.py [0:0]
def generate_check_duplicated_files_job_params_fn(**context):
input_bucket = context["params"]["input_bucket"]
input_folder = context["params"]["input_folder"]
input_folder_ful_uri = (
input_bucket if not input_folder else f"{input_bucket}/{input_folder}"
)
process_folder = context["ti"].xcom_pull(
task_ids="initial_load_from_input_bucket.create_process_folder",
key="process_folder",
)
output_folder = f'{os.environ.get("DPU_PROCESS_BUCKET")}/{process_folder}/workflow-io/check_duplicated_files'
context["ti"].xcom_push(key="output_folder", value=output_folder)
return cloud_run_utils.get_doc_registry_duplicate_job_override(
input_folder_ful_uri, output_folder
)