import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/**
 * # _*_ coding:utf-8 _*_
 * # Author:xiaoshubiao
 * # Time : 2020/5/13 14:13
 * java map,mapfunctions
 **/
public class spark_function {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("spark_java_function");
        JavaSparkContext sc = new JavaSparkContext(conf);
        List<String> list = Arrays.asList("a","b","c","d");
        JavaRDD<String> parallelize = sc.parallelize(list,3);
        parallelize
                .mapPartitions(
                new FlatMapFunction<Iterator<String>, String>() {
                    @Override
                    public Iterator<String> call(Iterator<String> stringIterator) throws Exception {
                        List<String> list = new ArrayList<>();
                        while (stringIterator.hasNext()){
                            String next = stringIterator.next();
                            System.out.println("mapPartitions函数的输出"+next);
                            list.add(next);
                        }
                        return list.iterator();
                    }
                }
        )
                .mapPartitionsWithIndex(
                new Function2<Integer, Iterator<String>, Iterator<String>>() {
                    @Override
                    public Iterator<String> call(Integer integer, Iterator<String> stringIterator) throws Exception {
                        List<String> l = new ArrayList();
                        while (stringIterator.hasNext()){
                            String next = stringIterator.next();
                            System.out.println("分区id:"+integer+"--值:"+next);
                            l.add(next+integer);
                        }
                        return l.iterator();
                    }
                }
        ,false).collect();
    }
}

 

posted on 2020-05-13 14:47  7749ha  阅读(538)  评论(0编辑  收藏  举报