def _parseDocumentPagesAndBlockMap()

in textract-pipeline/lambda/textractor/python/trp.py [0:0]


    def _parseDocumentPagesAndBlockMap(self):

        blockMap = {}

        documentPages = []
        documentPage = None
        for page in self._responsePages:
            for block in page['Blocks']:
                if('BlockType' in block and 'Id' in block):
                    blockMap[block['Id']] = block

                if(block['BlockType'] == 'PAGE'):
                    if(documentPage):
                        documentPages.append({"Blocks" : documentPage})
                    documentPage = []
                    documentPage.append(block)
                else:
                    documentPage.append(block)
        if(documentPage):
            documentPages.append({"Blocks" : documentPage})
        return documentPages, blockMap