def generate_tpch()

in tpch/tpchgen.py [0:0]


def generate_tpch(scale_factor: int, partitions: int):
    start_time = time.time()
    if partitions == 1:
        command = f"docker run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor}"
        run_and_log_output(command, "/tmp/tpchgen.log")
    else:
        max_threads = os.cpu_count()

        # List of commands to run
        commands = [
            (f"docker run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor} -C {partitions} -S {part}",
             f"/tmp/tpchgen-part{part}.log")
            for part in range(1, partitions + 1)
        ]

        # run commands in parallel
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
            futures = [executor.submit(run_and_log_output, command, log_file) for (command, log_file) in commands]

            # wait for all futures to complete
            for future in concurrent.futures.as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"Command failed with exception: {e}")

    end_time = time.time()
    print(f"Generated CSV data in {round(end_time - start_time, 2)} seconds")