def analyze_document_from_blob_url()

in tools/doc_intelligence.py [0:0]


    def analyze_document_from_blob_url(self, file_url, model='prebuilt-layout'):
        """
        Analyzes a document in a blob container using the specified model.

        Args:
            file_url (str): The URL of the blob containing the document.
            model (str): The model to use for document analysis.

        Returns:
            tuple: A tuple containing the analysis result and any errors encountered.
        """
        result = {}
        errors = []
        result_id = None

        filename = os.path.basename(urlparse(file_url).path)
        file_ext = self._get_file_extension(file_url)

        if file_ext == "pdf":
            self.docint_features = "ocr.highResolution"

        # Set request endpoint
        request_endpoint = f"https://{self.service_name}.cognitiveservices.azure.com/{self.ai_service_type}/documentModels/{model}:analyze?api-version={self.api_version}"
        if self.docint_features:
            request_endpoint += f"&features={self.docint_features}" 
        if self.output_content_format:
            request_endpoint += f"&outputContentFormat={self.output_content_format}"
        if self.analyze_output_options:
            request_endpoint += f"&output={self.analyze_output_options}"

        # Set request headers
        try:
            token = self.credential.get_token("https://cognitiveservices.azure.com/.default")
            headers = {
                "Content-Type": self._get_content_type(file_ext),
                "Authorization": f"Bearer {token.token}",
                "x-ms-useragent": "gpt-rag/1.0.0"
            }
            logging.debug(f"[docintelligence][{filename}] Retrieved authentication token.")
        except ClientAuthenticationError as e:
            error_message = f"Authentication failed: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors
        except Exception as e:
            error_message = f"Unexpected error during authentication: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors

        parsed_url = urlparse(file_url)
        account_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
        container_name = parsed_url.path.split("/")[1]
        blob_name = unquote(parsed_url.path[len(f"/{container_name}/"):])

        logging.debug(f"[docintelligence][{filename}] Connecting to blob storage.")

        try:
            blob_service_client = BlobServiceClient(account_url=account_url, credential=self.credential)
            blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
            data = blob_client.download_blob().readall()
            logging.debug(f"[docintelligence][{filename}] Downloaded blob data.")
        except ResourceNotFoundError:
            error_message = f"Blob '{blob_name}' not found in container '{container_name}'."
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors
        except ClientAuthenticationError as e:
            error_message = f"Authentication failed when accessing blob storage: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors
        except Exception as e:
            error_message = f"Error accessing blob storage: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors

        try:
            response = requests.post(request_endpoint, headers=headers, data=data)
            logging.info(f"[docintelligence][{filename}] Sent analysis request.")
        except Exception as e:
            error_message = f"Error when sending request to Document Intelligence API: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors

        if response.status_code != 202:
            error_messages = {
                404: "Resource not found. Please verify your request URL. The Document Intelligence API version you are using may not be supported in your region.",
            }
            error_message = error_messages.get(
                response.status_code, 
                f"Document Intelligence request error, code {response.status_code}: {response.text}"
            )
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors

        get_url = response.headers.get("Operation-Location")
        if not get_url:
            error_message = "Operation-Location header not found in the response."
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)
            return result, errors

        # Extract result_id
        try:
            result_id = get_url.split("/")[-1].split("?")[0]
            logging.debug(f"[docintelligence][{filename}] Extracted result_id: {result_id}")
        except Exception as e:
            error_message = f"Error extracting result_id: {e}"
            logging.error(f"[docintelligence][{filename}] {error_message}")
            errors.append(error_message)

        result_headers = headers.copy()
        result_headers["Content-Type"] = "application/json-patch+json"

        while True:
            try:
                result_response = requests.get(get_url, headers=result_headers)
                result_json = result_response.json()

                if result_response.status_code != 200 or result_json.get("status") == "failed":
                    error_message = f"Document Intelligence polling error, code {result_response.status_code}: {result_response.text}"
                    logging.error(f"[docintelligence][{filename}] {error_message}")
                    errors.append(error_message)
                    break

                if result_json.get("status") == "succeeded":
                    result = result_json.get('analyzeResult', {})
                    logging.debug(f"[docintelligence][{filename}] Analysis succeeded.")
                    break

                logging.debug(f"[docintelligence][{filename}] Analysis in progress. Waiting for 2 seconds before retrying.")
                time.sleep(2)
            except Exception as e:
                error_message = f"Error during polling for analysis result: {e}"
                logging.error(f"[docintelligence][{filename}] {error_message}")
                errors.append(error_message)
                break

        # enrich result
        result['result_id'] = result_id
        result['model_id'] = model

        return result, errors