tpch/make_data.py (23 lines of code) (raw):
import duckdb
import sys
conn = duckdb.connect()
def make(scale_factor: int, output_path: str):
statements = [
"install tpch",
"load tpch",
f"call dbgen(sf = {scale_factor})",
]
execute(statements)
statements = []
for row in conn.execute("show tables").fetchall():
table = row[0]
statements.append(
f"copy {table} to '{output_path}/{table}.parquet' (format parquet, compression zstd)"
)
execute(statements)
def execute(statements):
for statement in statements:
print(f"executing: {statement}")
conn.execute(statement)
if __name__ == "__main__":
make(int(sys.argv[1]), sys.argv[2])