根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式一
测试数据
java代码
1 package com.hzf.spark.study; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.Comparator; 6 import java.util.HashMap; 7 import java.util.Iterator; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Set; 11 12 import org.apache.spark.SparkConf; 13 import org.apache.spark.api.java.JavaPairRDD; 14 import org.apache.spark.api.java.JavaRDD; 15 import org.apache.spark.api.java.JavaSparkContext; 16 import org.apache.spark.api.java.function.Function; 17 import org.apache.spark.api.java.function.PairFunction; 18 import org.apache.spark.api.java.function.VoidFunction; 19 import org.apache.spark.broadcast.Broadcast; 20 21 import scala.Tuple2; 22 23 public class HotChannel01 { 24 public static void main(String[] args) { 25 SparkConf conf = new SparkConf() 26 .setAppName("HotChannel") 27 .setMaster("local") 28 .set("spark.testing.memory", "2147480000"); 29 JavaSparkContext sc = new JavaSparkContext(conf); 30 JavaRDD<String> logRDD = sc.textFile("userLog1"); 31 String str = "View"; 32 final Broadcast<String> broadcast = sc.broadcast(str); 33 hotChannel(sc, logRDD, broadcast); 34 } 35 private static void hotChannel(JavaSparkContext sc,JavaRDD<String> logRDD, final Broadcast<String> broadcast) { 36 JavaRDD<String> filteredLogRDD = logRDD.filter(new Function<String, Boolean>() { 37 38 private static final long serialVersionUID = 1L; 39 40 @Override 41 public Boolean call(String v1) throws Exception { 42 String actionParam = broadcast.value(); 43 String action = v1.split("\t")[5]; 44 return actionParam.equals(action); 45 } 46 }); 47 48 JavaPairRDD<String, String> channel2nullRDD = filteredLogRDD.mapToPair(new PairFunction<String, String,String>() { 49 50 private static final long serialVersionUID = 1L; 51 52 @Override 53 public Tuple2<String, String> call(String val) throws Exception { 54 String channel = val.split("\t")[4]; 55 56 return new Tuple2<String, String>(channel,null); 57 } 58 }); 59 Map<String, Object> channelPVMap = channel2nullRDD.countByKey(); 60 Set<String> keySet = channelPVMap.keySet(); 61 List<SortObj> channels = new ArrayList<>(); 62 for(String channel : keySet){ 63 channels.add(new SortObj(channel, Integer.valueOf(channelPVMap.get(channel)+""))); 64 } 65 Collections.sort(channels, new Comparator<SortObj>() { 66 67 @Override 68 public int compare(SortObj o1, SortObj o2) { 69 return o2.getValue() - o1.getValue(); 70 } 71 }); 72 73 List<String> hotChannelList = new ArrayList<>(); 74 for (int i = 0; i < 3; i++) { 75 hotChannelList.add(channels.get(i).getKey()); 76 } 77 for(String channle : hotChannelList){ 78 System.out.println("channle:" + channle); 79 } 80 81 final Broadcast<List<String>> hotChannelListBroadcast = sc.broadcast(hotChannelList); 82 83 84 JavaRDD<String> filtedRDD = logRDD.filter(new Function<String, Boolean>() { 85 86 @Override 87 public Boolean call(String v1) throws Exception { 88 List<String> hostChannels = hotChannelListBroadcast.value(); 89 String channel = v1.split("\t")[4]; 90 String userId = v1.split("\t")[2]; 91 return hostChannels.contains(channel) && !"null".equals(userId); 92 } 93 }); 94 95 JavaPairRDD<String, String> channel2UserRDD = filtedRDD.mapToPair(new PairFunction<String, String, String>() { 96 97 @Override 98 public Tuple2<String, String> call(String v1) throws Exception { 99 String[] splited = v1.split("\t"); 100 String channel = splited[4]; 101 String userId = splited[2]; 102 return new Tuple2<String, String>(channel,userId); 103 } 104 }); 105 106 channel2UserRDD.groupByKey().foreach(new VoidFunction<Tuple2<String,Iterable<String>>>() { 107 108 private static final long serialVersionUID = 1L; 109 110 @Override 111 public void call(Tuple2<String, Iterable<String>> tuple) throws Exception { 112 String channel = tuple._1; 113 Iterator<String> iterator = tuple._2.iterator(); 114 Map<String, Integer> userNumMap = new HashMap<>(); 115 while(iterator.hasNext()){ 116 String userId = iterator.next(); 117 Integer count = userNumMap.get(userId); 118 if(count == null){ 119 count = 1; 120 }else{ 121 count ++; 122 } 123 userNumMap.put(userId, count); 124 } 125 126 List<SortObj> lists = new ArrayList<>(); 127 Set<String> keys = userNumMap.keySet(); 128 for(String key : keys){ 129 lists.add(new SortObj(key, userNumMap.get(key))); 130 } 131 132 Collections.sort(lists,new Comparator<SortObj>() { 133 134 @Override 135 public int compare(SortObj O1, SortObj O2) { 136 return O2.getValue() - O1.getValue(); 137 } 138 }); 139 140 System.out.println("HOT_CHANNEL:"+channel); 141 for(int i = 0 ; i < 3 ; i++){ 142 SortObj sortObj = lists.get(i); 143 System.out.println(sortObj.getKey()+"=="+sortObj.getValue()); 144 } 145 } 146 }); 147 } 148 }
result