def multimodal_doc_intel

def multimodal_doc_intel_processing()

in function_app/bp_multimodal_doc_intel_processing.py [0:0]
58 lines of code
4 McCabe index (conditional complexity)

def multimodal_doc_intel_processing(req: func.HttpRequest) -> func.HttpResponse:
    logging.info(f"Python HTTP trigger function `{FUNCTION_ROUTE}` received a request.")
    try:
        # Load and validate input data
        error_text = "Error while loading and validating the input data."
        error_code = 422

        # Check the request body
        request_json_content = json.loads(req.files["json"].read().decode("utf-8"))
        include_page_images_after_content = request_json_content.get(
            "include_page_images_after_content", False
        )
        extract_and_crop_inline_figures = request_json_content.get(
            "extract_and_crop_inline_figures", False
        )

        # Now construct the a splitter class which can separate the outputs into different chunks
        pages_per_chunk = request_json_content.get("pages_per_chunk", 3)
        page_chunk_splitter = PageDocumentListSplitter(pages_per_chunk=pages_per_chunk)

        file_bytes = req.files["file"].read()
        file_mime_type = req.files["file"].content_type

        # Create the Doc Intelligence result processor. This can be configured to
        # process the raw Doc Intelligence result into a format that is easier
        # to work with downstream.
        doc_intel_result_processor = DocumentIntelligenceProcessor(
            page_processor=DefaultDocumentPageProcessor(
                page_img_order="after" if include_page_images_after_content else None,
            ),
            figure_processor=DefaultDocumentFigureProcessor(
                output_figure_img=extract_and_crop_inline_figures
            ),
        )

        # Process the document with Document Intelligence
        error_text = "An error occurred while processing the document."
        error_code = 422

        # Load content as images
        doc_page_imgs = load_visual_obj_bytes_to_pil_imgs_dict(
            file_bytes, file_mime_type, starting_idx=1, pdf_img_dpi=100
        )
        # Get Doc Intelligence resul;t
        poller = di_client.begin_analyze_document(
            model_id=DOC_INTEL_MODEL_ID,
            analyze_request=AnalyzeDocumentRequest(bytes_source=file_bytes),
        )
        di_result = poller.result()
        # Process the result into Documents containing the content of every element
        processed_content_docs = doc_intel_result_processor.process_analyze_result(
            analyze_result=di_result,
            doc_page_imgs=doc_page_imgs,
            on_error="raise",
        )
        # Chunk the content by page
        page_chunked_content_docs = page_chunk_splitter.split_document_list(
            processed_content_docs
        )
        # Merge adjacent text content together (reducing the number of objects)
        merged_page_chunked_content_docs = (
            doc_intel_result_processor.merge_adjacent_text_content_docs(
                page_chunked_content_docs
            )
        )
        # Convert the chunks into a single Markdown string
        di_processed_md = convert_processed_di_doc_chunks_to_markdown(
            merged_page_chunked_content_docs
        )
        return func.HttpResponse(
            body=di_processed_md,
            mimetype="text/plain",
            status_code=200,
        )
    except Exception as e:
        logging.exception(e)
        return func.HttpResponse(error_text, status_code=error_code)