def extract_text_from_blob()

in pipeline/activities/runDocIntel.py [0:0]


def extract_text_from_blob(blobObj: dict):
  try:
    client = DocumentIntelligenceClient(
        endpoint=endpoint, credential=config.credential
    )

  #Doc Intelligence does not 
    response = requests.get(url=blobObj["url"])
    file = response.content
    
    poller = client.begin_analyze_document(
        # AnalyzeDocumentRequest Class: https://learn.microsoft.com/en-us/python/api/azure-ai-documentintelligence/azure.ai.documentintelligence.models.analyzedocumentrequest?view=azure-python
        "prebuilt-read", AnalyzeDocumentRequest(bytes_source=file)
      )
    
    result: AnalyzeResult = poller.result()
    
    if result.paragraphs:    
        paragraphs = "\n".join([paragraph.content for paragraph in result.paragraphs])            
    
    return paragraphs
      
  except Exception as e:
    logging.error(f"Error processing {blobObj}: {e}")
    return None