Spark Word Count
import org.apache.spark.{SparkConf, SparkContext} object WordCount { def main(args:Array[String]): Unit = { val conf = new SparkConf().setAppName("WordCount") val sc = new SparkContext(conf) val lines = sc.textFile(args(0)) val wordCount = lines.flatMap(_.split(" ")).map(x => (x,1)).reduceByKey(_ + _) val wordSort = wordCount.map(x => (x._2,x._1)).sortByKey(false).map(x => (x._2,x._1)) wordSort.saveAsTextFile(args(1)) sc.stop() } }
spark-submit --class WordCount \
> --master yarn-cluster \
> --num-executors 10 \
> --executor-memory 6G \
> --executor-cores 4 \
> --driver-memory 1G \
> /tmp/spark_practice/sparkPrj.jar \
> /tmp/spark_practice/ghEmployees.txt \
> /tmp/spark_practice/output
submit 参数配置 https://www.cnblogs.com/haoyy/p/6893943.html