in modeling/coval/conll/reader.py [0:0]
def remove_nested_coref_mentions(clusters, keep_singletons, print_debug=False):
to_be_removed_mentions = {}
to_be_removed_clusters = []
all_removed_mentions = 0
all_removed_clusters = 0
for c_index, c in enumerate(clusters):
to_be_removed_mentions[c_index] = []
for i, m1 in enumerate(c):
for m2 in c[i+1:]:
nested = m1.are_nested(m2)
# m1 is nested in m2
if nested == 0:
to_be_removed_mentions[c_index].append(m1)
print(m1, m2)
print('=========================')
# m2 is nested in m1
elif nested == 1:
to_be_removed_mentions[c_index].append(m2)
print(m2)
for c_index in to_be_removed_mentions:
all_removed_mentions += len(to_be_removed_mentions[c_index])
if len(clusters[c_index]) != 1 and len(clusters[c_index]) - len(
to_be_removed_mentions[c_index]) == 1:
all_removed_clusters += 1
if print_debug:
print(clusters[c_index][0])
if not keep_singletons:
to_be_removed_clusters.append(c_index)
else:
clusters[c_index] = [
m for m in clusters[c_index]
if m not in to_be_removed_mentions[c_index]
]
for c_index in sorted(to_be_removed_clusters, reverse=True):
clusters.pop(c_index)
return all_removed_mentions, all_removed_clusters