in spark-application/src/main/scala/ValueZones.scala [90:105]
def readYellow( yellow: String): Dataset[Row] = {
import session.implicits._
session.read
.option("header","true")
.option("inferSchema", "true")
.option("enforceSchema", "false")
.option("timeStampFormat", "yyyy-MM-dd HH:mm:ss")
.option("columnNameOfCorruptRecord", "error")
.csv(yellow)
.filter(col("tpep_pickup_datetime").gt("2016"))
.filter(col("tpep_pickup_datetime").lt("2021"))
.withColumnRenamed("tpep_pickup_datetime","pickup_datetime")
.withColumnRenamed("tpep_dropoff_datetime","dropoff_datetime")
.select("VendorID", rideColumns.filter(_!="VendorID"): _*)
}