in src/main/scala/com/spotify/bdrc/pipeline/WordCount.scala [46:54]
def sparkTransformation(input: RDD[String]): RDD[(String, Long)] = {
input
.flatMap(_.split("[^a-zA-Z']+").filter(_.nonEmpty))
// There is no `countByValue` transformation in Spark although it is equivalent to mapping
// into initial count of `1` and reduce with addition
.map((_, 1L))
// `reduceByKey` can lift function into the map phase
.reduceByKey(_ + _)
}