in src/main/scala/com/spotify/bdrc/pipeline/WordCount.scala [57:63]
def sparkAction(input: RDD[String]): Seq[(String, Long)] = {
input
.flatMap(_.split("[^a-zA-Z']+").filter(_.nonEmpty))
// `countByValue` is an action and collects data back to the driver node
.countByValue()
.toSeq
}