Spark-Java-算子
package scala.spark.Day3; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.VoidFunction; import java.util.Arrays; import java.util.List; /** * Created by Administrator on 2019/10/16. */ public class JavaRDDTest { public static void main(String[] args) { System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0"); //JavaRDD 标准RDD //JavaPairRDD PairRDD //JavaDoubleRDD DoubleRDD //java没有scala隐式转化,生成RDD时候,必须指明是哪种RDD //实例化驱动器 SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local"); sparkConf.setAppName("Java RDD"); JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf); //javaSparkContext.parallelize() 标准RDD //javaSparkContext.parallelizePairs() PairRDD //javaSparkContext.parallelizeDoubles() 数值类型RDD List<Integer> list = Arrays.asList(1, 2, 3, 4, 5); JavaRDD<Integer> sourceRDD = javaSparkContext.parallelize(list); //map 算子 // 匿名子类 /* public interface Function<T1, R> extends Serializable { R call(T1 v1) throws Exception; } T1 RDD中成员的类型 v1 返回的类型 */ JavaRDD<Integer> mapRDD = sourceRDD.map( new Function<Integer, Integer>() { public Integer call(Integer v1) throws Exception { return v1 * v1; } } ); /* public interface VoidFunction<T> extends Serializable { void call(T t) throws Exception; } */ mapRDD.foreach( new VoidFunction<Integer>() { public void call(Integer integer) throws Exception { System.out.println(integer); } } ); javaSparkContext.close(); } }