11 友盟项目--拆分日志为五个表---UDTF函数jar测试并实现转储
2.启动zk
hdfs
yarn
hive
3.拆分函数
注册函数
hive>
add jar /soft/hive/lib/umeng_hive.jar ;
创建函数
drop function forkstartuplogs ;
drop function forkeventlogs ;
drop function forkerrorlogs ;
drop function forkusagelogs ;
drop function forkpagelogs ;
create function forkstartuplogs as 'com.oldboy.umeng.hive.udtf.ForkStartuplogsUDTF' ;
create function forkeventlogs as 'com.oldboy.umeng.hive.udtf.ForkEventlogsUDTF' ;
create function forkerrorlogs as 'com.oldboy.umeng.hive.udtf.ForkErrorlogsUDTF' ;
create function forkusagelogs as 'com.oldboy.umeng.hive.udtf.ForkUsagelogsUDTF' ;
create function forkpagelogs as 'com.oldboy.umeng.hive.udtf.ForkPagelogsUDTF' ;
测试函数
select forkeventlogs(servertimes , clienttimems , clientip, json) from raw_logs limit10;
注册完可在mysql中查看函数注册元数据
cmd下
mysql -h s101 -uroot -proot
mysql中注册永久函数
4.分区转储s到5个表的sql语句 hive下启动的是MR

insert into appstartuplogs partition(ym , day) select t.appchannel, t.appid, t.appplatform, t.appversion, t.brand, t.carrier, t.country , t.createdatms, t.deviceid , t.devicestyle, t.ipaddress, t.network , t.ostype , t.province, t.screensize, t.tenantid, date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkstartuplogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;

insert into appeventlogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.eventdurationsecs , t.eventid , t.ostype , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkeventlogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;

insert into apperrorlogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.errorbrief , t.errordetail , t.ostype , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkerrorlogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;

insert into appusagelogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.ostype , t.singledownloadtraffic , t.singleuploadtraffic , t.singleusedurationsecs , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkusagelogs (servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;

insert into apppagelogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.nextpage , t.ostype , t.pageid , t.pageviewcntinsession , t.staydurationsecs , t.tenantid , t.visitindex , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkpagelogs (servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
结果查看
分类:
大数据项目
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· 一文读懂知识蒸馏
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下