23 友盟项目--sparkstreaming对接kafka、集成redis--从redis中查询月留存率

从redis中查询月留存率

StatRemainRatioMonth

 1 package com.oldboy.umeng.spark.stat;
 2 
 3 
 4 
 5 /**
 6  * 统计月留存率
 7  */
 8 public class StatRemainRatioMonth {
 9     public static void main(String[] args) throws Exception {
10         SparkConf conf = new SparkConf();
11         conf.setAppName("statNew");
12         conf.setMaster("local[4]");
13         SparkSession sess = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate();
14 
15         //sess.sql("select * from row_log");
16         //注册函数
17         ExecSQLUtil.execRegisterFuncs(sess);
18 
19         String day = "20181105" ;
20         Jedis redis = new Jedis("s101" , 6379) ;
21         redis.select(1) ;
22         Set<String> keys = redis.keys("*") ;
23         //创建Row行类型List集合
24         List<Row> rows = new ArrayList<Row>() ;
25         for(String key :keys){
26             String[] arr = key.split("#") ;
27             String value = redis.get(key) ;
28             long mintime = Long.parseLong(value.split(",")[0]) ;//redis中取出最小值
29             System.out.println(DateUtil.formatDay(mintime , 0 , "yyyyMMdd"));
30             rows.add(RowFactory.create(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6],mintime)) ;
31         }
32         //创建java rdd
33         JavaRDD<Row> rdd1 = new JavaSparkContext(sess.sparkContext()).parallelize(rows);
34         //结构化字段
35         StructField[] fields = new StructField[8];
36         fields[0] = new StructField("appid", DataTypes.StringType, false, Metadata.empty());
37         fields[1] = new StructField("appversion", DataTypes.StringType, false, Metadata.empty());
38         fields[2] = new StructField("brand", DataTypes.StringType, false, Metadata.empty());
39         fields[3] = new StructField("appplatform", DataTypes.StringType, false, Metadata.empty());
40         fields[4] = new StructField("devicestyle", DataTypes.StringType, false, Metadata.empty());
41         fields[5] = new StructField("ostype", DataTypes.StringType, false, Metadata.empty());
42         fields[6] = new StructField("deviceid", DataTypes.StringType, false, Metadata.empty());
43         fields[7] = new StructField("mintime", DataTypes.LongType, false, Metadata.empty());
44         //指定schame类型
45         StructType type = new StructType(fields);
46         //创建Dataset  数据表
47         Dataset<Row> ds1 = sess.createDataFrame(rdd1 ,type ) ;
48         //数据表创建为临时表
49         ds1.createOrReplaceTempView("_tt");
50 
51         //新增设备 各个维度下的   条件:最小值所在的月 = 给出查询的时间所在的月
52         String sql = "select appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " +
53                              "from _tt " +
54                              "where formatbymonth(mintime, 0 , 'yyyyMM') = formatbymonth('"+ day+"' , 'yyyyMMdd' , 0 , 'yyyyMM')" +
55                              "group by appid ,appversion,brand,appplatform,devicestyle,ostype,deviceid " +
56                              "with cube" ;
57 
58         sess.sql(sql).createOrReplaceTempView("_t2");//再创建一个临时表
59         System.out.println("========================");
60         //有效的新增设备,注册v1视图  //设备id 和app 不能为空   其他字段如果为null,转为NULL,因为null不参与统计
61         sess.sql("select ifnull(ss.appid ,'NULLL') appid  ," +
62                          "ifnull(ss.appversion  ,'NULLL') appversion   ," +
63                          "ifnull(ss.appplatform ,'NULLL') appplatform  ," +
64                          "ifnull(ss.brand ,'NULLL') brand  ," +
65                          "ifnull(ss.devicestyle ,'NULLL') devicestyle  ," +
66                          "ifnull(ss.ostype,'NULLL') ostype ," +
67                          "ifnull(ss.deviceid    ,'NULLL') deviceid " +
68                          " from _t2 ss" +
69                          " where ss.appid is not null and ss.deviceid is not null")
70                 .createOrReplaceTempView("v1");
71         sess.sql("select * from v1").show(1000,false);
72 //
73         String sql2 = ResourceUtil.readResourceAsString("stat_remainratio_month2.sql") ;
74         sql2 = sql2.replace("${ymd}" , day) ;
75 //
76         //执行sql语句
77         ExecSQLUtil.execSQLString(sess , sql2);
78     }
79 }

 

执行sql语句

-- 计算留存率
use big12_umeng ;

-- 查询经过一个月后的活跃用户-
CREATE OR  replace TEMPORARY view v2 as
SELECT
  ifnull(s.appid       ,'NULLL') appid        ,
  ifnull(s.appversion  ,'NULLL') appversion   ,
  ifnull(s.appplatform ,'NULLL') appplatform  ,
  ifnull(s.brand       ,'NULLL') brand        ,
  ifnull(s.devicestyle ,'NULLL') devicestyle  ,
  ifnull(s.ostype      ,'NULLL') ostype       ,
  ifnull(s.deviceid    ,'NULLL') deviceid
FROM
(
  SELECT
    appid       ,
    appversion  ,
    appplatform ,
    brand       ,
    devicestyle ,
    ostype      ,
    deviceid
  FROM
    appstartuplogs
  WHERE
    ym = formatbymonth('${ymd}' , 'yyyyMMdd' , 1 , 'yyyyMM')
  group by
    appid       ,
    appversion  ,
    appplatform ,
    brand       ,
    devicestyle ,
    ostype      ,
    deviceid
    with cube
)s
WHERE
  s.appid is not NULL
  and s.deviceid is not null ;

--
-- 查询交集()
CREATE  OR  replace TEMPORARY view v3 as
SELECT
  v1.appid          ,
  v1.appversion     ,
  v1.appplatform    ,
  v1.brand          ,
  v1.devicestyle    ,
  v1.ostype         ,
  count(v1.deviceid) cnt
FROM
  v1,v2
WHERE
      v1.appid       = v2.appid
  and v1.appversion  = v2.appversion
  and v1.appplatform = v2.appplatform
  and v1.brand       = v2.brand
  and v1.devicestyle = v2.devicestyle
  and v1.ostype      = v2.ostype
  and v1.deviceid    = v2.deviceid
GROUP BY
  v1.appid       ,
  v1.appversion  ,
  v1.appplatform ,
  v1.brand       ,
  v1.devicestyle ,
  v1.ostype ;

CREATE  OR  replace TEMPORARY view v4 as
SELECT
  v1.appid          ,
  v1.appversion     ,
  v1.appplatform    ,
  v1.brand          ,
  v1.devicestyle    ,
  v1.ostype         ,
  count(v1.deviceid) cnt
FROM
  v1
GROUP BY
  v1.appid       ,
  v1.appversion  ,
  v1.appplatform ,
  v1.brand       ,
  v1.devicestyle ,
  v1.ostype ;

select * from v4 ;

SELECT
  v4.appid      ,
  v4.appversion ,
  v4.appplatform,
  v4.brand      ,
  v4.devicestyle,
  v4.ostype     ,
  ifnull(v3.cnt , 0) / v4.cnt
FROM
  v4 left outer join v3
on
      v3.appid       = v4.appid
  and v3.appversion  = v4.appversion
  and v3.appplatform = v4.appplatform
  and v3.brand       = v4.brand
  and v3.devicestyle = v4.devicestyle
  and v3.ostype      = v4.ostype

 

posted @ 2018-11-22 19:55  star521  阅读(378)  评论(0编辑  收藏  举报