def remove_nested_coref_mentions()

in modeling/coval/conll/reader.py [0:0]


def remove_nested_coref_mentions(clusters, keep_singletons, print_debug=False):
    to_be_removed_mentions = {}
    to_be_removed_clusters = []
    all_removed_mentions = 0
    all_removed_clusters = 0

    for c_index, c in enumerate(clusters):
        to_be_removed_mentions[c_index] = []

        for i, m1 in enumerate(c):
            for m2 in c[i+1:]:
                nested = m1.are_nested(m2)
                # m1 is nested in m2
                if nested == 0:
                    to_be_removed_mentions[c_index].append(m1)
                    print(m1, m2)
                    print('=========================')
                # m2 is nested in m1
                elif nested == 1:
                    to_be_removed_mentions[c_index].append(m2)
                    print(m2)

    for c_index in to_be_removed_mentions:
        all_removed_mentions += len(to_be_removed_mentions[c_index])

        if len(clusters[c_index]) != 1 and len(clusters[c_index]) - len(
                to_be_removed_mentions[c_index]) == 1:
            all_removed_clusters += 1

            if print_debug:
                print(clusters[c_index][0])

            if not keep_singletons:
                to_be_removed_clusters.append(c_index)
        else:
            clusters[c_index] = [
                    m for m in clusters[c_index]
                    if m not in to_be_removed_mentions[c_index]
            ]

    for c_index in sorted(to_be_removed_clusters, reverse=True):
        clusters.pop(c_index)

    return all_removed_mentions, all_removed_clusters