def parse_doc_classifier_output()

in components/dpu-workflow/src/docs_processing_orchestrator.py [0:0]


def parse_doc_classifier_output(**context):
    process_bucket = os.environ.get("DPU_PROCESS_BUCKET")
    assert process_bucket is not None, "DPU_PROCESS_BUCKET is not set"
    process_folder = context["ti"].xcom_pull(
        task_ids="initial_load_from_input_bucket.create_process_folder",
        key="process_folder",
    )
    detected_labels = gcs_utils.move_classifier_matched_files(
        process_bucket,
        process_folder,
        "pdf",
        list(SPECIALIZED_PROCESSORS_IDS_JSON.keys()),
    )
    return detected_labels