in tpipelinegeofinder/textractgeofinder/tword.py [0:0]
def combine_multiple_words_to_phrase(tword_list: "list[TWord]") -> "TWord":
"""
word_array = trp.Word objects
get xmin, ymin, xmax, ymax for both words and combine them with space 'word1 word2' and insert
simple calculation of new average confidence (conf1 + conf2) / 2
returns tuble ('word1 word2', xmin, ymin, xmax, ymax)"""
if not tword_list:
raise ValueError(f"tword_list is empty.")
phrase = " ".join([x.text for x in tword_list])
original_text = " ".join([x.original_text for x in tword_list if x.original_text])
text_type = 'phrase'
xmin = min([x.xmin for x in tword_list])
xmax = max([x.xmax for x in tword_list])
ymin = min([x.ymin for x in tword_list])
ymax = max([x.ymax for x in tword_list])
page_number = int(tword_list[0].page_number)
confidence = statistics.mean([x.confidence for x in tword_list])
doc_width = tword_list[0].doc_width
doc_height = tword_list[0].doc_height
return TWord(page_number=page_number,
original_text=original_text,
text_type=text_type,
text=phrase,
confidence=confidence,
xmin=xmin,
ymin=ymin,
xmax=xmax,
ymax=ymax,
id=str(uuid4()),
doc_width=doc_width,
doc_height=doc_height)