11 友盟项目--拆分日志为五个表---UDTF函数jar测试并实现转储

 

 

打包过程参照 09友盟项目

 

2.启动zk

  hdfs

  yarn

  hive

3.拆分函数
    注册函数
        hive>
        add jar /soft/hive/lib/umeng_hive.jar ;
        
   创建函数
        drop function forkstartuplogs ;
        drop function forkeventlogs ;
        drop function forkerrorlogs ;
        drop function forkusagelogs ;
        drop function forkpagelogs ;
 
    create function forkstartuplogs as 'com.oldboy.umeng.hive.udtf.ForkStartuplogsUDTF' ;
        create function forkeventlogs as 'com.oldboy.umeng.hive.udtf.ForkEventlogsUDTF' ;
        create function forkerrorlogs as 'com.oldboy.umeng.hive.udtf.ForkErrorlogsUDTF' ;
        create function forkusagelogs as 'com.oldboy.umeng.hive.udtf.ForkUsagelogsUDTF' ;
        create function forkpagelogs as 'com.oldboy.umeng.hive.udtf.ForkPagelogsUDTF' ;
 
  测试函数
    select  forkeventlogs(servertimes , clienttimems , clientip, json) from raw_logs limit10;

 注册完可在mysql中查看函数注册元数据

  cmd下

  mysql -h s101 -uroot -proot

  mysql中注册永久函数

4.分区转储s到5个表的sql语句   hive下启动的是MR

insert into appstartuplogs partition(ym , day)
select
  t.appchannel,
  t.appid,
  t.appplatform,
  t.appversion,
  t.brand,
  t.carrier,
  t.country ,
  t.createdatms,
  t.deviceid ,
  t.devicestyle,
  t.ipaddress,
  t.network ,
  t.ostype ,
  t.province,  
  t.screensize,
  t.tenantid,
  date_format(cast(t.createdatms as timestamp) , 'yyyyMM') ,
  date_format(cast(t.createdatms as timestamp) , 'dd')
from
  (
    select forkstartuplogs(servertimestr ,clienttimems ,clientip ,json) from  raw_logs limit 50
  )  t;
启动日志
insert into appeventlogs partition(ym , day)
select
   t.appchannel  ,
  t.appid  ,
  t.appplatform ,
  t.appversion ,
  t.brand ,
  t.createdatms ,
  t.deviceid ,
  t.devicestyle ,
  t.eventdurationsecs ,
  t.eventid ,
  t.ostype ,
  t.tenantid ,
  date_format(cast(t.createdatms as timestamp) , 'yyyyMM') ,
  date_format(cast(t.createdatms as timestamp) , 'dd')
from
  (
    select forkeventlogs(servertimestr ,clienttimems ,clientip ,json) from  raw_logs limit 50
  )  t;
事件日志
insert into apperrorlogs partition(ym , day)
select
   t.appchannel ,
  t.appid ,
  t.appplatform ,
  t.appversion ,
  t.brand ,
  t.createdatms ,
  t.deviceid ,
  t.devicestyle ,
  t.errorbrief ,
  t.errordetail ,
  t.ostype ,
  t.tenantid ,
  date_format(cast(t.createdatms as timestamp) , 'yyyyMM') ,
  date_format(cast(t.createdatms as timestamp) , 'dd')
from
  (
    select forkerrorlogs(servertimestr ,clienttimems ,clientip ,json) from  raw_logs limit 50
  )  t;
错误日志
insert into appusagelogs partition(ym , day)
select
  t.appchannel ,
  t.appid ,
  t.appplatform ,
  t.appversion ,
  t.brand ,
  t.createdatms ,
  t.deviceid ,
  t.devicestyle ,
  t.ostype ,
  t.singledownloadtraffic ,
  t.singleuploadtraffic ,
  t.singleusedurationsecs ,
  t.tenantid ,
  date_format(cast(t.createdatms as timestamp) , 'yyyyMM') ,
  date_format(cast(t.createdatms as timestamp) , 'dd')
from
  (
    select forkusagelogs (servertimestr ,clienttimems ,clientip ,json) from  raw_logs limit 50
  )  t;
使用页面日志
insert into apppagelogs partition(ym , day)
select
  t.appchannel ,
  t.appid ,
  t.appplatform ,
  t.appversion ,
  t.brand ,
  t.createdatms ,
  t.deviceid ,
  t.devicestyle ,
  t.nextpage ,
  t.ostype ,
  t.pageid ,
  t.pageviewcntinsession ,
  t.staydurationsecs ,
  t.tenantid ,
  t.visitindex ,
  date_format(cast(t.createdatms as timestamp) , 'yyyyMM') ,
  date_format(cast(t.createdatms as timestamp) , 'dd')
from
  (
    select forkpagelogs (servertimestr ,clienttimems ,clientip ,json) from  raw_logs limit 50
  )  t;
页面日志

 结果查看

 

posted @ 2018-11-02 15:09  star521  阅读(328)  评论(0编辑  收藏  举报