def set_annotated_parse_trees()

in modeling/coval/conll/reader.py [0:0]


def set_annotated_parse_trees(clusters, key_doc_lines, NP_only, min_span,
        partial_vp_chain_pruning=True, print_debug=False):
    pruned_cluster_indices = set()
    pruned_clusters = {}

    for i, c in enumerate(clusters):
        pruned_cluster = list(c)
        for m in c:
            try:
                tree = extract_annotated_parse(
                        key_doc_lines[m.sent_num][m.start:m.end + 1], m.start)
            except IndexError as err:
                print(err, len(key_doc_lines), m.sent_num)

            m.set_gold_parse(tree)

            ##If the conll file does not have words
            if not m.words[0]:
                terminals = []
                m.gold_parse.get_terminals(terminals)
                m.words = []
                for t in terminals:
                    for w in t.split():
                        m.words.append(w)

            if min_span:
                m.set_min_span()
            if tree and tree.tag == 'VP' and NP_only:
                pruned_cluster.remove(m)
                pruned_cluster_indices.add(i)
        pruned_clusters[i] = pruned_cluster

    if NP_only and pruned_cluster_indices:
        for i in sorted(pruned_cluster_indices, reverse=True):
            if len(pruned_clusters[i]) > 1 and partial_vp_chain_pruning:
                if print_debug:
                    print('VP partial pruning: ',
                            [str(m) for m in clusters[i]], '->',
                            [str(m) for m in pruned_clusters[i]])
            else:
                if print_debug:
                    print('VP full pruning, cluster size: ', len(clusters[i]),
                            ' cluster: ', [str(m) for m in clusters[i]])
                pruned_clusters.pop(i)

    return [pruned_clusters[k] for k in pruned_clusters]