in kilt/datasets/hotpotqa_ks.py [0:0]
def load_ks(ks_directory, verbose=False):
NUM_TREADS = int(multiprocessing.cpu_count())
if verbose:
print(f"loading hotpotqa knowledge source with {NUM_TREADS} threads")
pool = ThreadPool(NUM_TREADS)
filenames = []
directories = [
os.path.join(ks_directory, o)
for o in os.listdir(ks_directory)
if os.path.isdir(os.path.join(ks_directory, o))
]
for directory in directories:
onlyfiles = [
f
for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))
]
for filetto in onlyfiles:
filename = "{}/{}".format(directory, filetto)
filenames.append(filename)
arguments = [
{"id": i, "filenames": chunk, "verbose": verbose}
for i, chunk in enumerate(chunk_it(filenames, NUM_TREADS))
]
results = pool.map(run_thread, arguments)
output_dict = {}
for x in results:
output_dict.update(x)
pool.terminate()
pool.join()
return output_dict