src/util/topic_training_data_exporter.py (16 lines of code) (raw):

""" This file exports training data for the topic model from a set of user labeled groups. """ import pandas as pd from labeled_data_utils import get_labeled_dataset, user_test_list from tab_grouping.tab_titles import T5TopicGenerator from topic_utils import create_topic_training_dataset topic_generator = T5TopicGenerator() result_dfs = [] for user_dataset_name in user_test_list: datasets, labeled_topics = get_labeled_dataset(user_dataset_name) for i in range(len(datasets)): dataset = datasets[i] cur_run_labeled_topics = labeled_topics[i] training_dataset = create_topic_training_dataset(dataset, "smart_group_label", topic_generator, predicted_id_topics=cur_run_labeled_topics) result_dfs.append(training_dataset) all_users = pd.concat(result_dfs) all_users.to_json("./output/label_training/all_users2.json", orient="records") all_users.to_csv("./output/label_training/all_users2.csv")