in 1_synthetic-qa-generation/reasoningplaning/evolve.py [0:0]
def createSeedPrompts(self):
"""
Turn self.seed_data into a list of strings of text self.source_text_list
Each text string can represent as little as a word, or as much as document.
Just has to be representative of some concept or body of text.
:return: None
"""
if isinstance(self.seed_data, str) and os.path.exists(self.seed_data):
data = load_dataset("json", data_files=self.seed_data)
self.seed_text_dict = dict()
for d in data['train']:
s = ""
if isinstance(self.column_names, str):
s = d[self.column_names]
else:
assert False, "column_names must be a str"
for col in self.column_names:
s += d[col] + "\n"
# self.seed_text_dict.append(s.strip())
self.seed_text_dict[s.strip()] = {
"idx": d["idx"],
GRND_TRUTH_COL: d[GRND_TRUTH_COL]
}
if int(d["idx"]) > self.maxIdx:
self.maxIdx = int(d["idx"])
assert self.seed_text_dict, "data import failed, got empty list"
self.maxIdx = self.maxIdx + 10