def convert_tpch()

in tpch/tpchgen.py [0:0]


def convert_tpch(scale_factor: int, partitions: int):
    start_time = time.time()
    ctx = SessionContext()
    if partitions == 1:
        # convert to parquet
        for table in table_names:
            convert_tbl_to_parquet(ctx, table, f"data/{table}.tbl", "tbl", f"data/{table}.parquet")
    else:
        for table in table_names:
            run(f"mkdir -p data/{table}.parquet")
            if table == "nation" or table == "region":
                # nation and region are special cases and do not generate multiple files
                convert_tbl_to_parquet(ctx, table, f"data/{table}.tbl", "tbl", f"data/{table}.parquet/part1.parquet")
            else:
                for part in range(1, partitions + 1):
                    convert_tbl_to_parquet(ctx, table, f"data/{table}.tbl.{part}", f"tbl.{part}", f"data/{table}.parquet/part{part}.parquet")
    end_time = time.time()
    print(f"Converted CSV to Parquet in {round(end_time - start_time, 2)} seconds")