in function_app/bp_multimodal_doc_intel_processing.py [0:0]
def multimodal_doc_intel_processing(req: func.HttpRequest) -> func.HttpResponse:
logging.info(f"Python HTTP trigger function `{FUNCTION_ROUTE}` received a request.")
try:
# Load and validate input data
error_text = "Error while loading and validating the input data."
error_code = 422
# Check the request body
request_json_content = json.loads(req.files["json"].read().decode("utf-8"))
include_page_images_after_content = request_json_content.get(
"include_page_images_after_content", False
)
extract_and_crop_inline_figures = request_json_content.get(
"extract_and_crop_inline_figures", False
)
# Now construct the a splitter class which can separate the outputs into different chunks
pages_per_chunk = request_json_content.get("pages_per_chunk", 3)
page_chunk_splitter = PageDocumentListSplitter(pages_per_chunk=pages_per_chunk)
file_bytes = req.files["file"].read()
file_mime_type = req.files["file"].content_type
# Create the Doc Intelligence result processor. This can be configured to
# process the raw Doc Intelligence result into a format that is easier
# to work with downstream.
doc_intel_result_processor = DocumentIntelligenceProcessor(
page_processor=DefaultDocumentPageProcessor(
page_img_order="after" if include_page_images_after_content else None,
),
figure_processor=DefaultDocumentFigureProcessor(
output_figure_img=extract_and_crop_inline_figures
),
)
# Process the document with Document Intelligence
error_text = "An error occurred while processing the document."
error_code = 422
# Load content as images
doc_page_imgs = load_visual_obj_bytes_to_pil_imgs_dict(
file_bytes, file_mime_type, starting_idx=1, pdf_img_dpi=100
)
# Get Doc Intelligence resul;t
poller = di_client.begin_analyze_document(
model_id=DOC_INTEL_MODEL_ID,
analyze_request=AnalyzeDocumentRequest(bytes_source=file_bytes),
)
di_result = poller.result()
# Process the result into Documents containing the content of every element
processed_content_docs = doc_intel_result_processor.process_analyze_result(
analyze_result=di_result,
doc_page_imgs=doc_page_imgs,
on_error="raise",
)
# Chunk the content by page
page_chunked_content_docs = page_chunk_splitter.split_document_list(
processed_content_docs
)
# Merge adjacent text content together (reducing the number of objects)
merged_page_chunked_content_docs = (
doc_intel_result_processor.merge_adjacent_text_content_docs(
page_chunked_content_docs
)
)
# Convert the chunks into a single Markdown string
di_processed_md = convert_processed_di_doc_chunks_to_markdown(
merged_page_chunked_content_docs
)
return func.HttpResponse(
body=di_processed_md,
mimetype="text/plain",
status_code=200,
)
except Exception as e:
logging.exception(e)
return func.HttpResponse(error_text, status_code=error_code)