def filter_top_percentile()

in misc/precision_filtering/wordlist_gen.py [0:0]


def filter_top_percentile(counter, percentile=95):
    # Get the frequencies as a list
    frequencies = list(counter.values())
    
    # Calculate the percentile threshold
    percentile_threshold = np.percentile(frequencies, percentile)
    
    # Filter and sort the counter by descending count
    filtered_items = {
        word: count for word, count in counter.items() if count >= percentile_threshold
    }
    sorted_filtered = dict(sorted(filtered_items.items(), key=lambda x: x[1], reverse=True))
    
    return Counter(sorted_filtered)