def sparkTransformation()

in src/main/scala/com/spotify/bdrc/pipeline/WordCount.scala [46:54]


  def sparkTransformation(input: RDD[String]): RDD[(String, Long)] = {
    input
      .flatMap(_.split("[^a-zA-Z']+").filter(_.nonEmpty))
      // There is no `countByValue` transformation in Spark although it is equivalent to mapping
      // into initial count of `1` and reduce with addition
      .map((_, 1L))
      // `reduceByKey` can lift function into the map phase
      .reduceByKey(_ + _)
  }