def convert_tpcds()

in tpcds/tpcdsgen.py [0:0]


def convert_tpcds(scale_factor: int, partitions: int):
    start_time = time.time()
    ctx = SessionContext()
    if partitions == 1:
        # convert to parquet
        for table in table_names:
            convert_dat_to_parquet(ctx, table, f"data/{table}.dat", "dat", f"data/{table}.parquet")
    else:
        for table in table_names:
            run(f"mkdir -p data/{table}.parquet")
            for part in range(1, partitions + 1):
                source_file = f"data/{table}.dat/part-{part}.dat"
                if os.path.exists(source_file):
                    convert_dat_to_parquet(ctx, table, source_file, "dat", f"data/{table}.parquet/part{part}.parquet")
    end_time = time.time()
    print(f"Converted CSV to Parquet in {round(end_time - start_time, 2)} seconds")