def readGreen()

in spark-application/src/main/scala/ValueZones.scala [73:88]


  def readGreen( green: String): Dataset[Row] = {
    import session.implicits._

    session.read
      .option("header","true")
      .option("inferSchema", "true")
      .option("enforceSchema", "false")
      .option("timeStampFormat", "yyyy-MM-dd HH:mm:ss")
      .option("columnNameOfCorruptRecord", "error")
      .csv(green)
      .filter(col("lpep_pickup_datetime").gt("2016"))
      .filter(col("lpep_pickup_datetime").lt("2021"))
      .withColumnRenamed("lpep_pickup_datetime","pickup_datetime")
      .withColumnRenamed("lpep_dropoff_datetime","dropoff_datetime")
      .select("VendorID", rideColumns.filter(_!="VendorID"): _*)
  }