in modeling/coval/conll/reader.py [0:0]
def set_annotated_parse_trees(clusters, key_doc_lines, NP_only, min_span,
partial_vp_chain_pruning=True, print_debug=False):
pruned_cluster_indices = set()
pruned_clusters = {}
for i, c in enumerate(clusters):
pruned_cluster = list(c)
for m in c:
try:
tree = extract_annotated_parse(
key_doc_lines[m.sent_num][m.start:m.end + 1], m.start)
except IndexError as err:
print(err, len(key_doc_lines), m.sent_num)
m.set_gold_parse(tree)
##If the conll file does not have words
if not m.words[0]:
terminals = []
m.gold_parse.get_terminals(terminals)
m.words = []
for t in terminals:
for w in t.split():
m.words.append(w)
if min_span:
m.set_min_span()
if tree and tree.tag == 'VP' and NP_only:
pruned_cluster.remove(m)
pruned_cluster_indices.add(i)
pruned_clusters[i] = pruned_cluster
if NP_only and pruned_cluster_indices:
for i in sorted(pruned_cluster_indices, reverse=True):
if len(pruned_clusters[i]) > 1 and partial_vp_chain_pruning:
if print_debug:
print('VP partial pruning: ',
[str(m) for m in clusters[i]], '->',
[str(m) for m in pruned_clusters[i]])
else:
if print_debug:
print('VP full pruning, cluster size: ', len(clusters[i]),
' cluster: ', [str(m) for m in clusters[i]])
pruned_clusters.pop(i)
return [pruned_clusters[k] for k in pruned_clusters]