def main()

in benchmarks/gray_sort_benchmark.py [0:0]


def main():
    SortBenchTool.ensure_installed()

    driver = Driver()
    driver.add_argument("-R", "--record_nbytes", type=int, default=100)
    driver.add_argument("-K", "--key_nbytes", type=int, default=10)
    driver.add_argument("-T", "--total_data_nbytes", type=int, default=None)
    driver.add_argument("-B", "--gensort_batch_nbytes", type=int, default=512 * MB)
    driver.add_argument("-n", "--num_data_partitions", type=int, default=None)
    driver.add_argument("-t", "--num_sort_partitions", type=int, default=None)
    driver.add_argument("-i", "--input_paths", nargs="+", default=[])
    driver.add_argument("-e", "--shuffle_engine", default="duckdb", choices=("duckdb", "arrow"))
    driver.add_argument("-s", "--sort_engine", default="duckdb", choices=("duckdb", "arrow", "polars"))
    driver.add_argument("-H", "--hive_partitioning", action="store_true")
    driver.add_argument("-V", "--validate_results", action="store_true")
    driver.add_argument("-C", "--shuffle_cpu_limit", type=int, default=ShuffleNode.default_cpu_limit)
    driver.add_argument(
        "-M",
        "--shuffle_memory_limit",
        type=int,
        default=ShuffleNode.default_memory_limit,
    )
    driver.add_argument("-TC", "--sort_cpu_limit", type=int, default=8)
    driver.add_argument("-TM", "--sort_memory_limit", type=int, default=None)
    driver.add_argument("-NC", "--cpus_per_node", type=int, default=psutil.cpu_count(logical=False))
    driver.add_argument("-NM", "--memory_per_node", type=int, default=psutil.virtual_memory().total)
    driver.add_argument("-CP", "--parquet_compression", default=None)
    driver.add_argument("-LV", "--parquet_compression_level", type=int, default=None)

    user_args, driver_args = driver.parse_arguments()
    assert len(user_args.input_paths) == 0 or user_args.num_sort_partitions is not None

    total_num_cpus = max(1, driver_args.num_executors) * user_args.cpus_per_node
    memory_per_cpu = user_args.memory_per_node // user_args.cpus_per_node

    user_args.sort_cpu_limit = 1 if user_args.sort_engine == "arrow" else user_args.sort_cpu_limit
    sort_memory_limit = user_args.sort_memory_limit or user_args.sort_cpu_limit * memory_per_cpu
    user_args.total_data_nbytes = user_args.total_data_nbytes or max(1, driver_args.num_executors) * user_args.memory_per_node
    user_args.num_data_partitions = user_args.num_data_partitions or total_num_cpus // 2
    user_args.num_sort_partitions = user_args.num_sort_partitions or max(
        total_num_cpus // user_args.sort_cpu_limit,
        user_args.total_data_nbytes // (sort_memory_limit // 4),
    )

    plan = gray_sort_benchmark(**vars(user_args))
    driver.run(plan)