def doc_intel_extract_city

def doc_intel_extract_city_names()

in function_app/bp_doc_intel_extract_city_names.py [0:0]
93 lines of code
11 McCabe index (conditional complexity)

def doc_intel_extract_city_names(req: func.HttpRequest) -> func.HttpResponse:
    logging.info(f"Python HTTP trigger function `{FUNCTION_ROUTE}` received a request.")
    # Create the object to hold all intermediate and final values. We will progressively update
    # values as each stage of the pipeline is completed, allowing us to return a partial
    # response in case of an error at any stage.
    output_model = FunctionReponseModel(success=False)
    try:
        # Create an error_text variable. This will be updated as we move through
        # the pipeline so that if a step fails, the error_text var reflects what
        # has failed. If all steps complete successfully, the var is never used.
        error_text = "An error occurred during processing."
        error_code = 422

        func_timer = MeasureRunTime()
        func_timer.start()

        # Check mime_type of the request data
        mime_type = req.headers.get("Content-Type")
        if mime_type not in VALID_DI_PREBUILT_READ_LAYOUT_MIME_TYPES:
            return func.HttpResponse(
                "This function only supports a Content-Type of {}. Supplied file is of type {}".format(
                    ", ".join(VALID_DI_PREBUILT_READ_LAYOUT_MIME_TYPES), mime_type
                ),
                status_code=422,
            )

        ### Check the request body
        req_body = req.get_body()
        if len(req_body) == 0:
            return func.HttpResponse(
                "Please provide a base64 encoded PDF in the request body.",
                status_code=422,
            )
        ### 1. Load the images from the PDF/image input
        error_text = "An error occurred during image extraction."
        error_code = 500
        doc_page_imgs = load_visual_obj_bytes_to_pil_imgs_dict(
            req_body, mime_type, starting_idx=1, pdf_img_dpi=100
        )
        ### Extract the text using Document Intelligence
        error_text = "An error occurred during Document Intelligence extraction."
        with MeasureRunTime() as di_timer:
            poller = di_client.begin_analyze_document(
                model_id=DOC_INTEL_MODEL_ID,
                analyze_request=AnalyzeDocumentRequest(bytes_source=req_body),
            )
            di_result = poller.result()
            output_model.di_raw_response = di_result.as_dict()
            processed_content_docs = doc_intel_result_processor.process_analyze_result(
                analyze_result=di_result,
                doc_page_imgs=doc_page_imgs,
                on_error="raise",
            )
            merged_processed_content_docs = (
                doc_intel_result_processor.merge_adjacent_text_content_docs(
                    processed_content_docs
                )
            )
        output_model.di_extracted_text = "\n".join(
            doc.content for doc in processed_content_docs if doc.content is not None
        )
        output_model.di_time_taken_secs = di_timer.time_taken
        ### 3. Create the messages to send to the LLM in the following order:
        #      i. System prompt
        #      ii. Extracted text and images from Document Intelligence
        error_text = "An error occurred while creating the LLM input messages."
        # Convert chunk content to OpenAI messages
        content_openai_message = convert_processed_di_docs_to_openai_message(
            merged_processed_content_docs, role="user"
        )
        input_messages = [
            {
                "role": "system",
                "content": LLM_SYSTEM_PROMPT,
            },
            content_openai_message,
        ]
        output_model.llm_input_messages = input_messages

        ### 4. Send request to LLM
        error_text = "An error occurred when sending the LLM request."
        with MeasureRunTime() as llm_timer:
            llm_result = aoai_client.chat.completions.create(
                messages=input_messages,
                model=AOAI_LLM_DEPLOYMENT,
                response_format={"type": "json_object"},  # Ensure we get JSON responses
            )
        output_model.llm_time_taken_secs = llm_timer.time_taken
        ### 5. Validate that the LLM response matches the expected schema
        error_text = "An error occurred when validating the LLM's returned response into the expected schema."
        output_model.llm_reply_message = llm_result.choices[0].to_dict()
        output_model.llm_raw_response = llm_result.choices[0].message.content
        llm_structured_response = LLMCityNamesModel(
            **json.loads(llm_result.choices[0].message.content)
        )
        output_model.result = llm_structured_response
        ### 8. All steps completed successfully, set success=True and return the final result
        output_model.success = True
        output_model.func_time_taken_secs = func_timer.stop()
        return func.HttpResponse(
            body=output_model.model_dump_json(),
            mimetype="application/json",
            status_code=200,
        )
    except Exception as _e:
        # If an error occurred at any stage, return the partial response. Update the error_text
        # field to contain the error message, and ensure success=False.
        output_model.success = False
        output_model.error_text = error_text
        output_model.func_time_taken_secs = func_timer.stop()
        logging.exception(output_model.error_text)
        return func.HttpResponse(
            body=output_model.model_dump_json(),
            mimetype="application/json",
            status_code=error_code,
        )