def sort_and_sample_filter_list()

in collection/download_commoncrawl_passages.py [0:0]


def sort_and_sample_filter_list(filter_list_path: Path) -> None:
    """Sort and sample URLs in a filter list."""
    urls = []
    with open(filter_list_path) as f:
        for line in f:
            urls.append(line.rstrip())

    urls.sort()

    with open(sampled_filter_lists_root / filter_list_path.name, 'w') as f:
        for i, url in enumerate(urls):
            if i % 100 == 0:
                f.write(url + '\n')