in service/app/engine.py [0:0]
def convert_csv_to_parquet(csv_dir, parquet_dir):
csv_files = [c for c in absolute_file_paths(csv_dir) if c.endswith(".csv")]
logging.warning(csv_files)
for c in csv_files:
logging.warning(f"csv file: {c}")
now = int(time.time())
csv_prefix = c.split(csv_dir)[-1]
parquet_path = os.path.join(
parquet_dir, csv_prefix.replace(".csv", f".parquet")
)
parquet_partition_dir = "/".join(parquet_path.split("/")[0:-1])
if not os.path.exists(parquet_partition_dir):
os.makedirs(parquet_partition_dir)
pd.read_csv(c).to_parquet(parquet_path, compression="snappy")