in pipeline/activities/runDocIntel.py [0:0]
def extract_text_from_blob(blobObj: dict):
try:
client = DocumentIntelligenceClient(
endpoint=endpoint, credential=config.credential
)
#Doc Intelligence does not
response = requests.get(url=blobObj["url"])
file = response.content
poller = client.begin_analyze_document(
# AnalyzeDocumentRequest Class: https://learn.microsoft.com/en-us/python/api/azure-ai-documentintelligence/azure.ai.documentintelligence.models.analyzedocumentrequest?view=azure-python
"prebuilt-read", AnalyzeDocumentRequest(bytes_source=file)
)
result: AnalyzeResult = poller.result()
if result.paragraphs:
paragraphs = "\n".join([paragraph.content for paragraph in result.paragraphs])
return paragraphs
except Exception as e:
logging.error(f"Error processing {blobObj}: {e}")
return None