in misc/precision_filtering/wordlist_gen.py [0:0]
def filter_top_percentile(counter, percentile=95):
# Get the frequencies as a list
frequencies = list(counter.values())
# Calculate the percentile threshold
percentile_threshold = np.percentile(frequencies, percentile)
# Filter and sort the counter by descending count
filtered_items = {
word: count for word, count in counter.items() if count >= percentile_threshold
}
sorted_filtered = dict(sorted(filtered_items.items(), key=lambda x: x[1], reverse=True))
return Counter(sorted_filtered)