def _callTextract()

in src/tdp.py [0:0]


    def _callTextract(self):
        textract = AwsHelper().getClient('textract', self.inputParameters.awsRegion)
        if(not self.inputParameters.detectForms and not self.inputParameters.detectTables):
            if(self.inputParameters.isLocalDocument):
                with open(self.inputParameters.documentPath, 'rb') as document:
                    imageData = document.read()
                    imageBytes = bytearray(imageData)

                response = textract.detect_document_text(Document={'Bytes': imageBytes})
            else:
                response = textract.detect_document_text(
                    Document={
                        'S3Object': {
                            'Bucket': self.inputParameters.bucketName,
                            'Name': self.inputParameters.documentPath
                        }
                    }
                )
        else:
            features  = []
            if(self.inputParameters.detectTables):
                features.append("TABLES")
            if(self.inputParameters.detectForms):
                features.append("FORMS")

            if(self.inputParameters.isLocalDocument):
                with open(self.inputParameters.documentPath, 'rb') as document:
                    imageData = document.read()
                    imageBytes = bytearray(imageData)

                response = textract.analyze_document(Document={'Bytes': imageBytes} , FeatureTypes=features)
            else:
                response = textract.analyze_document(
                    Document={
                        'S3Object': {
                            'Bucket': self.inputParameters.bucketName,
                            'Name': self.inputParameters.documentPath
                        }
                    },
                    FeatureTypes=features
                )

        return response