in src/tab_title_tuning_data.py [0:0]
def gen_data_single_document(self, dataset: pd.DataFrame, limit=0):
for col in ['id', 'task', 'task_id', 'test_set_id']:
if col not in dataset.columns:
dataset[col] = dataset.index
results = []
test_ids = dataset["test_set_id"].unique().tolist()
if limit > 0:
test_ids = test_ids[:limit]
results.extend(self.compute_training_data_for_tests(dataset, test_ids, 1))
return pd.DataFrame(results)