in src/main/scala/com/aliyun/emr/example/spark/SparkWordCount.scala [22:45]
def main(args: Array[String]): Unit = {
if (args.length < 3) {
System.err.println(
"""Usage: bin/spark-submit --class com.aliyun.emr.example.SparkWordCount examples-1.0-SNAPSHOT-shaded.jar <inputPath> <outputPath> <numPartition>
|
|Arguments:
|
| inputPath Input OSS object path, like oss://accessKeyId:accessKeySecret@bucket.endpoint/input/words.txt
| outputPath Output OSS object path, like oss://accessKeyId:accessKeySecret@bucket.endpoint/output
| numPartitions The number of RDD partitions.
|
""".stripMargin)
System.exit(1)
}
val inputPath = args(0)
val outputPath = args(1)
val numPartitions = args(2).toInt
val input = getSparkContext.textFile(inputPath, numPartitions)
val output = input.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _)
output.saveAsTextFile(outputPath)
}