spark actions 算子
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | package action; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; import java.util.Arrays; import java.util.List; import java.util.Map; /** * TODO * * @ClassName: actions * @author: DingH * @since: 2019/4/2 10:53 */ public class actions { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName( "actions" ).setMaster( "local" ); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<Integer> parallelize = sc.parallelize(Arrays.asList( 1 , 2 , 3 , 4 , 5 )); JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(Arrays.asList( new Tuple2<String, Integer>( "aaaa" , 111 ), new Tuple2<String, Integer>( "aaaa" , 111 ), new Tuple2<String, Integer>( "bbbb" , 222 ), new Tuple2<String, Integer>( "bbbb" , 222 ), new Tuple2<String, Integer>( "bbbb" , 222 ), new Tuple2<String, Integer>( "ccc" , 333 ) )); JavaPairRDD<String, Integer> rdd1 = rdd.reduceByKey( new Function2<Integer, Integer, Integer>() { public Integer call(Integer integer, Integer integer2) throws Exception { return integer + integer2; } }); Tuple2<String, Integer> reduce = rdd1.reduce( new Function2<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() { public Tuple2<String, Integer> call(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> stringIntegerTuple22) throws Exception { Tuple2<String, Integer> stringIntegerTuple21 = new Tuple2<String, Integer>(stringIntegerTuple2._1 + stringIntegerTuple22._1, stringIntegerTuple2._2 + stringIntegerTuple22._2); return stringIntegerTuple21; } }); System.out.println(reduce); List<Tuple2<String, Integer>> collect = rdd1.collect(); for (Tuple2<String,Integer> tt:collect){ System.out.println(tt); } long count = rdd1.count(); Tuple2<String, Integer> first = rdd1.first(); List<Tuple2<String, Integer>> take = rdd1.take( 4 ); List<Tuple2<String, Integer>> tuple2s = rdd1.takeSample( false , 3 ); rdd1.saveAsTextFile( "" ); Map<String, Object> stringObjectMap = rdd1.countByKey(); rdd1.foreach( new VoidFunction<Tuple2<String, Integer>>() { public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception { System.out.println( 11 ); } }); sc.stop(); } } |
分类:
Spark
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)