in tpipelinegeofinder/textractgeofinder/tgeofinder.py [0:0]
def get_values_for_phrase_coordinate(self, phrase_coordinates: List[PhraseCoordinate]) -> List[float]:
"""This method makes it easier to develop resilient templates but allowing to get area-coordinate from different phrases and pick one that workds.
It only returns the list of first ones found, not all.
finding phrases is an expensive operation (maybe make it lazy...)
"""
return_value: List[float] = list()
for phrase_coordinate in phrase_coordinates:
phrases_found: List[TWord] = self.find_phrase_on_page(phrase=phrase_coordinate.phrase,
min_textdistance=phrase_coordinate.min_textdistance)
if phrases_found:
logger.debug(f"get_values_for_phrase_coordinate: found value for phrase: {phrases_found}")
for tword_phrase in phrases_found:
if phrase_coordinate.coordinate == PointValueType.XMAX:
r_value_add = tword_phrase.xmax
elif phrase_coordinate.coordinate == PointValueType.YMAX:
r_value_add = tword_phrase.ymax
elif phrase_coordinate.coordinate == PointValueType.XMIN:
r_value_add = tword_phrase.xmin
elif phrase_coordinate.coordinate == PointValueType.YMIN:
r_value_add = tword_phrase.ymin
else:
r_value_add = None
logger.warn(f"no coordinate for phrase_coordinate: {phrase_coordinate}")
if r_value_add:
return_value.append(r_value_add)
if return_value:
if len(return_value) > 1:
logger.warning(
f"non unique - (len={len(return_value)}) for phrase_coordinate.phrase: {phrase_coordinate.phrase}, phrases_found:{phrases_found}"
)
logger.debug(f"get_values_for_phrase_coordinate - {return_value}")
return return_value
if not return_value:
raise NoPhraseForAreaFoundError(f"nothin found for phrase_coordinates: {phrase_coordinates}")
return return_value