in src/pre_human_task_lambda.py [0:0]
def plumber_line_to_blocks(page, plumber_line, blockIndex, page_width, page_height):
"""Return a list of line and word blocks for a PDFPlumber parsed line."""
blockIndex += 1
blockLine = Block(page, 'LINE', ' '.join([plumber_word['text'] for plumber_word in plumber_line]), blockIndex)
blockWordList = []
ids = []
for plumber_word in plumber_line:
blockIndex += 1
blockWord = Block(page, 'WORD', plumber_word['text'], blockIndex,
get_geometry_from_plumber_word(plumber_word, page_width, page_height), blockLine.blockIndex)
blockLine.extend_geometry(blockWord.Geometry)
blockWordList.append(blockWord)
ids.append(blockWord.Id)
blockLine.Relationships.append(Relationship(ids, 'CHILD'))
ret_blocks = [blockLine]
ret_blocks.extend(blockWordList)
return ret_blocks, blockIndex