in src/main/scala/com/spotify/bdrc/pipeline/WordCount.scala [28:36]
def scalding(input: TypedPipe[String]): TypedPipe[(String, Long)] = {
input
.flatMap(_.split("[^a-zA-Z']+").filter(_.nonEmpty))
// `groupBy` is lazy
.groupBy(identity)
// Operations like `size` after `groupBy` can be lifted into the map phase
.size
.toTypedPipe
}