def get_exs_and_stats()

in obelics/visualization/choose_filtering_parameters_web_documents_node_level.py [0:0]


def get_exs_and_stats(web_document_dataset, type_exs, funcs_compute_stats, text_node_level=True):
    exs = []
    for idx_row in range(web_document_dataset.num_rows):
        new_els = non_empty_els_from_list(web_document_dataset[idx_row][type_exs])
        if type_exs == "texts":
            new_els = non_empty_els_from_list(web_document_dataset[idx_row][type_exs])
            if not text_node_level:  # Text at document level
                exs.append("\n\n".join(new_els))
            else:  # Text at paragraph level
                new_els = [txt.split("\n\n") for txt in new_els]
                new_els = [paragraph for txt in new_els for paragraph in txt]
                exs.extend(new_els)
        else:
            exs.extend(new_els)

    all_stats = {}
    all_stats["exs"] = exs

    for stat_name, func_compute_stats in funcs_compute_stats.items():
        all_stats[stat_name] = [round(func_compute_stats(ex), 2) for ex in exs]

    return all_stats