def load_ks()

in kilt/datasets/hotpotqa_ks.py [0:0]


def load_ks(ks_directory, verbose=False):
    NUM_TREADS = int(multiprocessing.cpu_count())

    if verbose:
        print(f"loading hotpotqa knowledge source with {NUM_TREADS} threads")
    pool = ThreadPool(NUM_TREADS)

    filenames = []
    directories = [
        os.path.join(ks_directory, o)
        for o in os.listdir(ks_directory)
        if os.path.isdir(os.path.join(ks_directory, o))
    ]
    for directory in directories:
        onlyfiles = [
            f
            for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f))
        ]
        for filetto in onlyfiles:
            filename = "{}/{}".format(directory, filetto)
            filenames.append(filename)

    arguments = [
        {"id": i, "filenames": chunk, "verbose": verbose}
        for i, chunk in enumerate(chunk_it(filenames, NUM_TREADS))
    ]

    results = pool.map(run_thread, arguments)
    output_dict = {}
    for x in results:
        output_dict.update(x)
    pool.terminate()
    pool.join()

    return output_dict