def get_coref_infos()

in modeling/coval/conll/reader.py [0:0]


def get_coref_infos(key_file,
        sys_file,
        NP_only=False,
        remove_nested=False,
        keep_singletons=True,
        min_span=False,
        mode='testing'):

    key_doc_lines = get_doc_lines(key_file)
    sys_doc_lines = get_doc_lines(sys_file)

    doc_coref_infos = {}

    key_nested_coref_num = 0
    sys_nested_coref_num = 0
    key_removed_nested_clusters = 0
    sys_removed_nested_clusters = 0
    key_singletons_num = 0
    sys_singletons_num = 0

    for doc in key_doc_lines:

        key_clusters, singletons_num = get_doc_mentions(
                doc, key_doc_lines[doc], keep_singletons, print_debug=(mode=='testing'))
        key_singletons_num += singletons_num

        if NP_only or min_span:
            key_clusters = set_annotated_parse_trees(key_clusters,
                    key_doc_lines[doc],
                    NP_only, min_span)

        sys_clusters, singletons_num = get_doc_mentions(
                doc, sys_doc_lines[doc], keep_singletons, print_debug=(mode=='testing'))
        sys_singletons_num += singletons_num

        if NP_only or min_span:
            sys_clusters = set_annotated_parse_trees(sys_clusters,
                    key_doc_lines[doc],
                    NP_only, min_span)

        if remove_nested:
            nested_mentions, removed_clusters = remove_nested_coref_mentions(
                    key_clusters, keep_singletons)
            key_nested_coref_num += nested_mentions
            key_removed_nested_clusters += removed_clusters

            nested_mentions, removed_clusters = remove_nested_coref_mentions(
                    sys_clusters, keep_singletons)
            sys_nested_coref_num += nested_mentions
            sys_removed_nested_clusters += removed_clusters

        sys_mention_key_cluster = get_mention_assignments(
                sys_clusters, key_clusters)
        key_mention_sys_cluster = get_mention_assignments(
                key_clusters, sys_clusters)

        doc_coref_infos[doc] = (key_clusters, sys_clusters,
                key_mention_sys_cluster, sys_mention_key_cluster)

    if remove_nested:
        print('Number of removed nested coreferring mentions in the key '
                'annotation: %s; and system annotation: %s' % (
                key_nested_coref_num, sys_nested_coref_num))
        print('Number of resulting singleton clusters in the key '
                'annotation: %s; and system annotation: %s' % (
                key_removed_nested_clusters, sys_removed_nested_clusters))

    if not keep_singletons:
        print('%d and %d singletons are removed from the key and system '
                'files, respectively' % (
                key_singletons_num, sys_singletons_num))

    return doc_coref_infos