in utils/entity_resolution.py [0:0]
def resolve_entities(self, tree):
'''
This method resolves groups of tokens into entities based on the parent node of those leaves, which determines
which catalog file will be used. If a catalog file exists for the entity
but the value is not matched in the file, then UNK_ENTITY_SYMBOL will be inserted in place of the resolved
entity, for eg. (SIZE biggest size ) --> (SIZE <UNKNOWN_ENTITY> ).
Catalog file can be mapping a group of tokens to an arbitrary tree, for eg. 'two liters' --> VOLUME(2,LITER)
hence this method converts the notation VOLUME(2,LITER) to the flat string representation
(VOLUME 2 LITER ) before loading the string as an ExpressSemanticTree and inputting that as resolved entity.
Note that this method assumes that when constructing a tree from a string the group of tokens will be inserted
left to right as children of an entity node. For eg. given an input string '(TOPPING green peppers )' the tree
constructed from it and passed to the resolver must preserve the order of 'green' and 'peppers' when constructing
the children of TOPPING, otherwise this method could potentially try to look for 'peppers green' in the catalog
and return that no such entity value is known.
:param: (SemanticTree) Input SemanticTree object.
:return: (SemanticTree) SemanticTree object where entities are resolved.
'''
tree_class = type(tree)
# We only resolve entities if all the children are terminal.
if all(c.is_leaf() for c in tree.children()):
children_tokens = [c.root_symbol() for c in tree.children()]
if tree.root_symbol() in self.entities:
entity_value = ' '.join(children_tokens)
if entity_value in self.entities[tree.root_symbol()]:
string_subtree = to_prefix_notation(self.entities[tree.root_symbol()][entity_value])
resolved_subtree = tree_class(flat_string=string_subtree).children()[0]
return resolved_subtree
# If the node is supposed to be a resolvable entity (a catalog file exits) but the value is unknown, then
# we input the unknown entity symbol.
else:
return tree_class(flat_string=f"({tree.root_symbol()} {EntityResolver.UNK_ENTITY_SYMBOL} )").children()[0]
# If the node itself is not an entity for which we want to perform ER (for eg. a NAME node for which
# we want to capture the exact string as is in the tree, and not resolve it) then the children tokens
# are inserted as is.
return tree_class(tree_rep=tree.tree_rep)
# We then recursively apply the method to children. Leaves of a node which has other
# non-terminals as children are untouched. Other non-terminals are sent through entity resolution themselves.
new_children = [c if c.is_leaf() else self.resolve_entities(c) for c in tree.children()]
return tree_class(root_symbol=tree.root_symbol(), children=new_children)