def transform_data()

in assets/spark_scripts/SparkScript01.py [0:0]


def transform_data(spark, source_bucket, destination_bucket):
    movies = (spark.read.option("header", "true")
              .option("delimiter", ",")
              .option("inferSchema", "true")
              .csv(f"{source_bucket}"))

    movies.write.mode('overwrite').parquet(f"{destination_bucket}")