def process()

in bq-connector/docai_bq_connector/doc_ai_processing/Processor.py [0:0]


    def process(self) -> Union[DocumentOperation, ProcessedDocument]:
        gcs_doc_blob, gcs_doc_meta = self._get_gcs_blob()
        if self.content_type == CONTENT_TYPE_PDF:
            # Original document. Needs to be processed by a DocAI extractor
            page_count = get_pdf_page_cnt(gcs_doc_blob)
            # Limit is different per processor: https://cloud.google.com/document-ai/quotas
            if page_count <= self.max_sync_page_count:
                process_result = self._process_sync(document_blob=gcs_doc_blob)
            else:
                process_result = self._process_async()
            if (
                isinstance(process_result, ProcessedDocument)
                and process_result is not None
            ):
                self._write_result_to_gcs(process_result.dictionary)
        elif self.content_type == CONTENT_TYPE_JSON:
            # This document was already processed and sent for HITL review. The result must now be processed
            logging.debug(
                f"Read DocAI HITL Output file = {self.bucket_name}/{self.file_name}"
            )
            process_result = self._process_hitl_output(gcs_doc_blob)
        else:
            logging.info(
                f"Skipping non-supported file type {self.file_name} with content type = {self.content_type}"
            )

        return process_result