in code/lambda_layer/pipeline/python/og.py [0:0]
def writeTextractOutputs(self, taggingStr=None):
if not self.document.pages:
return
docText = ""
p = 1
for page in self.document.pages:
opath = "{}/page-{}/response.json".format(self.outputPath, p)
S3Helper.writeToS3(json.dumps(page.blocks), self.bucketName, opath, taggingStr)
self._outputText(page, p)
docText = docText + page.text + "\n"
if(self.forms):
self._outputForm(page, p)
if(self.tables):
self._outputTable(page, p)
p = p + 1
# Write the whole output for it to then be used for comprehend
opath = "{}/fullresponse.json".format(self.outputPath)
print("Total Pages in Document: {}".format(len(self.document.pages)))
S3Helper.writeToS3(json.dumps(self.response), self.bucketName, opath, taggingStr)