in src-python/trp/__init__.py [0:0]
def getLinesInReadingOrder(self):
columns = []
lines = []
for item in self._lines:
column_found = False
for index, column in enumerate(columns):
bbox_left = item.geometry.boundingBox.left
bbox_right = item.geometry.boundingBox.left + item.geometry.boundingBox.width
bbox_centre = item.geometry.boundingBox.left + item.geometry.boundingBox.width / 2
column_centre = column['left'] + column['right'] / 2
if (bbox_centre > column['left'] and bbox_centre < column['right']) or (column_centre > bbox_left
and column_centre < bbox_right):
#Bbox appears inside the column
lines.append([index, item.text])
column_found = True
break
if not column_found:
columns.append({
'left': item.geometry.boundingBox.left,
'right': item.geometry.boundingBox.left + item.geometry.boundingBox.width
})
lines.append([len(columns) - 1, item.text])
lines.sort(key=lambda x: x[0])
return lines