in src/scripts/gen_embeddings.py [0:0]
def prepare_dataset(dataset_name, entity, indices_path):
input_filename = f'./data/{dataset_name}/{entity}/completions.json' if entity else f'./data/{dataset_name}/dataset.json'
indices = create_indices(dataset_name, indices_path)
calculated_embeddings_indices_path = f'./data/{dataset_name}/{entity}/filenames.json' if entity else f'./data/{dataset_name}/filenames.json'
if os.path.isfile(calculated_embeddings_indices_path):
with open(calculated_embeddings_indices_path, 'r') as f:
finished_indices = json.load(f)
indices = list(set(indices) ^ set(finished_indices))
try:
with open(input_filename, 'r') as fp:
dataset = {k: v if isinstance(v, list) else [v] for k, v in json.load(fp).items() if k in indices}
except KeyError:
print(f"The file {input_filename} doesn't contain necessary keys.")
return {}
return dataset