def save()

in misc/precision_filtering/wordlist_gen.py [0:0]


def save(tokenizer_id, selected_language):

    tokenizer_path = os.path.join(root_path, tokenizer_id)

    with open(os.path.join(tokenizer_path, selected_language + '.pkl'), 'rb') as f:
        language_tf = pickle.load(f)

    with open(os.path.join('/fsx/user_dir/common_freq', selected_language + '.pkl'), 'rb') as f:
        common_tf = pickle.load(f)

    language_filter_tf = filter_top_percentile(language_tf, 95)
    filtered_counter = filter_by_ratio(language_filter_tf, common_tf, 0.85)

    output_dir = './wordlists-0.85'
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, f'{selected_language}.txt')

    # Save the filtered results to a text file
    with open(output_file, 'w') as f:
        for key, ratio in filtered_counter.items():
            f.write(f'{key}\n')