in tpch/tpchgen.py [0:0]
def convert_tbl_to_parquet(ctx: SessionContext, table: str, tbl_filename: str, file_extension: str, parquet_filename: str):
print(f"Converting {tbl_filename} to {parquet_filename} ...")
# schema manipulation code copied from DataFusion Python tpch example
table_schema = [pyarrow.field(r[0].lower(), r[1], nullable=False) for r in all_schemas[table]]
# Pre-collect the output columns so we can ignore the null field we add
# in to handle the trailing | in the file
output_cols = [r.name for r in table_schema]
# Trailing | requires extra field for in processing
table_schema.append(pyarrow.field("some_null", pyarrow.null(), nullable=True))
schema = pyarrow.schema(table_schema)
df = ctx.read_csv(tbl_filename, schema=schema, has_header=False, file_extension=file_extension, delimiter="|")
df = df.select_columns(*output_cols)
df.write_parquet(parquet_filename, compression="snappy")