spark actions 算子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package action;
 
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
 
import java.util.Arrays;
import java.util.List;
import java.util.Map;
 
/**
 * TODO
 *
 * @ClassName: actions
 * @author: DingH
 * @since: 2019/4/2 10:53
 */
public class actions {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("actions").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);
 
        JavaRDD<Integer> parallelize = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
 
        JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(Arrays.asList(
                new Tuple2<String, Integer>("aaaa", 111),
                new Tuple2<String, Integer>("aaaa", 111),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("ccc", 333)
        ));
 
        JavaPairRDD<String, Integer> rdd1 = rdd.reduceByKey(new Function2<Integer, Integer, Integer>() {
            public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
            }
        });
 
        Tuple2<String, Integer> reduce = rdd1.reduce(new Function2<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() {
            public Tuple2<String, Integer> call(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> stringIntegerTuple22) throws Exception {
                Tuple2<String, Integer> stringIntegerTuple21 = new Tuple2<String, Integer>(stringIntegerTuple2._1 + stringIntegerTuple22._1, stringIntegerTuple2._2 + stringIntegerTuple22._2);
 
                return stringIntegerTuple21;
            }
        });
 
        System.out.println(reduce);
 
        List<Tuple2<String, Integer>> collect = rdd1.collect();
        for (Tuple2<String,Integer> tt:collect){
            System.out.println(tt);
        }
 
        long count = rdd1.count();
 
        Tuple2<String, Integer> first = rdd1.first();
 
        List<Tuple2<String, Integer>> take = rdd1.take(4);
 
        List<Tuple2<String, Integer>> tuple2s = rdd1.takeSample(false, 3);
 
        rdd1.saveAsTextFile("");
 
        Map<String, Object> stringObjectMap = rdd1.countByKey();
 
        rdd1.foreach(new VoidFunction<Tuple2<String, Integer>>() {
            public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                System.out.println(11);
            }
        });
 
        sc.stop();
    }
}

  

posted @   _Meditation  阅读(127)  评论(0编辑  收藏  举报
编辑推荐:
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
点击右上角即可分享
微信分享提示