笔记:尚硅谷大数据Spark-2019
使用IntelliJ Idea编写WordCount程序在Spark上运行
import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD object WordCount { def main(args:Array[String]): Unit ={ /* // 1. val inputFile="file:///usr/local/spark/mycode/word.txt" val conf=new SparkConf().setAppName("WordCount").setMaster("local") val sc=new SparkContext(conf) val textFile=sc.textFile(inputFile) val wordCount=(textFile flatMap (line => line.split(" "))).map(word=>(word,1)).reduceByKey((a, b)=>a+b) wordCount.foreach(println)*/ // 2. val config:SparkConf=new SparkConf().setMaster("local[*]").setAppName("WordCount") val sc =new SparkContext(config) val lines: RDD[String] = sc.textFile("in/word.txt") val words: RDD[String] = lines.flatMap(_.split(" ")) var wordToOne: RDD[(String, Int)] = words.map((_, 1)) var wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _) var result: Array[(String, Int)] = wordToSum.collect() result.foreach(println) } }
将Spark程序部署到Yarn中执行