in src-csharp/TextractExtensions.cs [192:219]
public List<IndexedText> GetLinesInReadingOrder() {
var lines = new List<IndexedText>();
var columns = new List<Column>();
this.Lines.ForEach(line => {
var columnFound = false;
for(var index = 0; index < columns.Count; index++) {
var column = columns[index];
var bb = line.Geometry.BoundingBox;
var bbLeft = bb.Left;
var bbRight = bb.Left + bb.Width;
var bbCentre = bb.Left + (bb.Width / 2);
var columnCentre = column.Left + (column.Right / 2);
if((bbCentre > column.Left && bbCentre < column.Right) || (columnCentre > bbLeft && columnCentre < bbRight)) {
lines.Add(new IndexedText { ColumnIndex = index, Text = line.Text });
columnFound = true;
break;
}
}
if(!columnFound) {
var bb = line.Geometry.BoundingBox;
columns.Add(new Column { Left = bb.Left, Right = bb.Left + bb.Width });
lines.Add(new IndexedText { ColumnIndex = columns.Count - 1, Text = line.Text });
}
});
lines.FindAll(line => line.ColumnIndex == 0).ForEach(line => Console.WriteLine(line));
return lines;
}