in modeling/coval/conll/reader.py [0:0]
def get_coref_infos(key_file,
sys_file,
NP_only=False,
remove_nested=False,
keep_singletons=True,
min_span=False,
mode='testing'):
key_doc_lines = get_doc_lines(key_file)
sys_doc_lines = get_doc_lines(sys_file)
doc_coref_infos = {}
key_nested_coref_num = 0
sys_nested_coref_num = 0
key_removed_nested_clusters = 0
sys_removed_nested_clusters = 0
key_singletons_num = 0
sys_singletons_num = 0
for doc in key_doc_lines:
key_clusters, singletons_num = get_doc_mentions(
doc, key_doc_lines[doc], keep_singletons, print_debug=(mode=='testing'))
key_singletons_num += singletons_num
if NP_only or min_span:
key_clusters = set_annotated_parse_trees(key_clusters,
key_doc_lines[doc],
NP_only, min_span)
sys_clusters, singletons_num = get_doc_mentions(
doc, sys_doc_lines[doc], keep_singletons, print_debug=(mode=='testing'))
sys_singletons_num += singletons_num
if NP_only or min_span:
sys_clusters = set_annotated_parse_trees(sys_clusters,
key_doc_lines[doc],
NP_only, min_span)
if remove_nested:
nested_mentions, removed_clusters = remove_nested_coref_mentions(
key_clusters, keep_singletons)
key_nested_coref_num += nested_mentions
key_removed_nested_clusters += removed_clusters
nested_mentions, removed_clusters = remove_nested_coref_mentions(
sys_clusters, keep_singletons)
sys_nested_coref_num += nested_mentions
sys_removed_nested_clusters += removed_clusters
sys_mention_key_cluster = get_mention_assignments(
sys_clusters, key_clusters)
key_mention_sys_cluster = get_mention_assignments(
key_clusters, sys_clusters)
doc_coref_infos[doc] = (key_clusters, sys_clusters,
key_mention_sys_cluster, sys_mention_key_cluster)
if remove_nested:
print('Number of removed nested coreferring mentions in the key '
'annotation: %s; and system annotation: %s' % (
key_nested_coref_num, sys_nested_coref_num))
print('Number of resulting singleton clusters in the key '
'annotation: %s; and system annotation: %s' % (
key_removed_nested_clusters, sys_removed_nested_clusters))
if not keep_singletons:
print('%d and %d singletons are removed from the key and system '
'files, respectively' % (
key_singletons_num, sys_singletons_num))
return doc_coref_infos