evolve-instruct/merge_json.py (16 lines of code) (raw):
import os
from pathlib import Path
import json, ftfy
from datasets import load_dataset
data = []
for path in Path('./gen_data').rglob('*.json'):
print(path)
with open(path) as f:
d = json.load(f)
data += d
out_file_name_txt = "evol_instruct.json"
out = open(out_file_name_txt, "w")
out.write(json.dumps(data, indent=2, ensure_ascii=False))
out.close()
ds = load_dataset("json", data_files=out_file_name_txt)
print(ds)