spark 数据分析

package com.swust.java.spark;


import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;


import java.util.Arrays;
import java.util.Iterator;
import java.util.List;


public class SparkTest {
    public static void main(String[] args) {

        SparkConf conf = new SparkConf();
        conf.setAppName("wordCount");
        conf.setMaster("local");
        JavaSparkContext jsc = new JavaSparkContext(conf);
        jsc.setLogLevel("Error");
        String inpath = "./data/csvdata.csv";
        JavaRDD<String> lines = jsc.textFile(inpath);
        //2011-07-13 00:00:00+08,352024,29448-51331,0,0,0,0,0,G,0
        //0            1     2     3      4        5        6         7        8       9
        //record_time,imei,cell,ph_num,call_num,drop_num,duration,drop_rate,net_type,erl
        /**
         * record_time:通话时间
         * imei:基站编号
         * cell:手机编号
         * drop_num:掉话的秒数
         * duration:通话持续总秒数
         */
        // 1    5     6
        JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String line) throws Exception {
                //StringBuffer sb = new StringBuffer();

                String[] ls = line.split(",");
                if(!ls[0].equals("2011-07-13 00:00:00+08")){
                    ls[1]="352024";
                    ls[5]="0";
                    ls[6]="0";
                }

                String wd=ls[1]+"\t"+ls[5]+"\t"+ls[6];
                return Arrays.asList(wd).iterator();
            }
        });
//        List<String> test = words.take(5);
//        System.out.println(test);
        JavaPairRDD<String, String> wordPairs = words.mapToPair(new PairFunction<String, String, String>() {
            @Override
            public Tuple2<String, String> call(String line) throws Exception {
                String[] words = line.split("\t");
                String key = words[0];
                String value = words[1] + "\t" + words[2];
                Tuple2<String, String> tp = new Tuple2<>(key, value);
                return tp;
            }
        });
        JavaPairRDD<String, String> res = wordPairs.reduceByKey(new Function2<String, String, String>() {
            @Override
            public String call(String value1, String value2) throws Exception {
                String[] v1 = value1.split("\t");
                String[] v2 = value2.split("\t");
                Double drop1 = Double.parseDouble(v1[0]);
                Double drop2 = Double.parseDouble(v2[0]);
                Double sum1 = Double.parseDouble(v1[1]);
                Double sum2 = Double.parseDouble(v2[1]);
                return (drop1 + drop2) + "\t" + (sum1 + sum2);
            }
        });
       // System.out.println(res.take(3));
        JavaPairRDD<String, Double> result = res.mapToPair(new PairFunction<Tuple2<String, String>, String, Double>() {
            @Override
            public Tuple2<String, Double> call(Tuple2<String, String> text) throws Exception {
                String key = text._1;
                String[] splits = text._2.split("\t");
                Double drop = Double.parseDouble(splits[0]);
                Double sum = Double.parseDouble(splits[1]);
                Double value = 0.0;
                if (sum != 0.0) {
                    value = drop / sum;
                }
                Tuple2<String, Double> tp2 = new Tuple2<>(key, value);
                return tp2;
            }
        });
        JavaPairRDD<Double, String> rest = result.mapToPair(new PairFunction<Tuple2<String, Double>, Double, String>() {
            @Override
            public Tuple2<Double, String> call(Tuple2<String, Double> tp) throws Exception {
                return tp.swap();
            }
        });
        JavaPairRDD<Double, String> resort = rest.sortByKey(false);
        JavaPairRDD<String, Double> trueResult = resort.mapToPair(new PairFunction<Tuple2<Double, String>, String, Double>() {
            @Override
            public Tuple2<String, Double> call(Tuple2<Double, String> tp) throws Exception {
                return tp.swap();
            }
        });
        List<Tuple2<String, Double>> ts = trueResult.take(10);
        Iterator<Tuple2<String, Double>> iterator = ts.iterator();
        while (iterator.hasNext()){
            System.out.println(iterator.next());
        }
    }
}

  

基站掉话率:找出掉线率最高的前10基站
record_time:通话时间
imei:基站编号
cell:手机编号
drop_num:掉话的秒数
duration:通话持续总秒数

 

 

posted @ 2020-04-14 22:08  雪瞳  阅读(165)  评论(0编辑  收藏  举报