in src/tdp.py [0:0]
def _callTextract(self):
textract = AwsHelper().getClient('textract', self.inputParameters.awsRegion)
if(not self.inputParameters.detectForms and not self.inputParameters.detectTables):
if(self.inputParameters.isLocalDocument):
with open(self.inputParameters.documentPath, 'rb') as document:
imageData = document.read()
imageBytes = bytearray(imageData)
response = textract.detect_document_text(Document={'Bytes': imageBytes})
else:
response = textract.detect_document_text(
Document={
'S3Object': {
'Bucket': self.inputParameters.bucketName,
'Name': self.inputParameters.documentPath
}
}
)
else:
features = []
if(self.inputParameters.detectTables):
features.append("TABLES")
if(self.inputParameters.detectForms):
features.append("FORMS")
if(self.inputParameters.isLocalDocument):
with open(self.inputParameters.documentPath, 'rb') as document:
imageData = document.read()
imageBytes = bytearray(imageData)
response = textract.analyze_document(Document={'Bytes': imageBytes} , FeatureTypes=features)
else:
response = textract.analyze_document(
Document={
'S3Object': {
'Bucket': self.inputParameters.bucketName,
'Name': self.inputParameters.documentPath
}
},
FeatureTypes=features
)
return response