一.示例
1.统计PV和UV
1.1统计PV
val conf = new SparkConf()
conf.setMaster("local").setAppName("pvuv")
val sc = new SparkContext(conf)
val lineRDD = sc.textFile("./pvuv.txt")
lineRDD.map(x=>{
val sp=x.split("\\s")
(sp(5),1)
}).reduceByKey(_+_).foreach(println)
1.2统计UV
lineRDD.map(x=>{
val sp=x.split("\\s")
(sp(5),sp(0))
}).distinct().countByKey().foreach(println)
2.二次排序
SparkConf sparkConf = new SparkConf()
.setMaster("local")
.setAppName("SecondarySortTest");
final JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaRDD<String> secondRDD = sc.textFile("secondSort.txt");
JavaPairRDD<SecondSortKey, String> pairSecondRDD = secondRDD.mapToPair(new PairFunction<String, SecondSortKey, String>() {