埋点-埋点模型

 模型代码,解析json和打标签

一、建表语句

create table target_tab(
  event_time bigint comment '事件时间',
  event_timestamp bigint comment '事件时间戳',
  video_time bigint comment '直播跨天时间',
  video_timestamp bigint comment '直播跨天时间戳',
  req_idx int comment '埋点请求次数序号',
  did string comment '设备id',
  client_type int comment '端类型',
  app_version string comment 'app版本号',
  sdk_version string comment 'sdk版本号',
  biz_type int comment '业务线',
  user_agent string comment '用户设备UA',
  user_number string comment '用户ID',
  gif string comment '数据源topic',
  biz_name string comment '业务线-产品标识 源自新版SDK公共字段name',
  ngx_timestamp bigint comment '日志到达ngnix的unix时间戳',
  is_video int comment '直播标签 1直播 0非直播',
  label_map map<string,string> comment '指标标签')
partitioned by (dt int comment '分区')

二、模型代码


set
hive.exec.dynamic.partition.mode = nonstrict; insert overwrite table bdg_inf.target_tab partition(dt) select event_time ,event_timestamp ,video_time ,video_timestamp ,req_idx ,case when gid is not null then gid when did is not null then did end did ,client_type ,app_version ,sdk_version ,biz_type ,user_agent ,user_number ,gif ,biz_name ,ngx_timestamp ,is_video ,map( ----丢失率 --web 'label_loss_02', IF(web直播条件,'1','0'), --ios直播 'label_loss_03', IF(ios直播条件,'1','0'), --android直播 'label_loss_04', IF(安卓直播条件,'1','0'), ----延迟率 --web 'label_delay_02', IF(web延迟率,'1','0'), --ios直播 'label_delay_03', IF(~~~~,'1','0'), --android直播 'label_delay_04', IF(~~~~,'1','0') ) as label_map ,dt from ( select from_unixtime(cast(event_timestamp / 1000 as bigint),'yyyyMMdd') as event_time ,event_timestamp ,from_unixtime(cast(video_timestamp / 1000 as bigint),'yyyyMMdd') as video_time ,video_timestamp ,req_idx ,did ,gid ,case --转成和视频一样 3:ios 4:android when is_video=0 and (client_type = 3 or client_type = 4) then 3 when is_video=0 and client_type = 2 then 4 else client_type --直播的特殊,已经提前转化 end client_type ,app_version ,sdk_version ,biz_type ,user_agent ,user_number ,gif ,biz_name ,ngx_timestamp ,is_video ,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as web_v1 ,cast(regexp_extract(sdk_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as web_v2 ,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 1) as BigInt) as app_v1 ,cast(regexp_extract(app_version, '(\\d+).(\\d+).(\\d+)', 2) as BigInt) as app_v2 ,dt from( select event_timestamp, 0 as video_timestamp, cast(get_json_object(params, '$.req_idx') as BigInt) req_idx, tid as did, get_json_object(params, '$.gid') gid, client_type, get_json_object(params, '$.app_version') app_version, event_id, user_id, sdk_version, biz_type, gif, net_type, user_agent, user_number, biz_name, ngx_timestamp, 0 as is_video, dt from dw.tab1 where dt =${dt} union all select event_timestamp, 0 as video_timestamp, cast(get_json_object(params, '$.req_idx') as BigInt) req_idx, tid as did, get_json_object(params, '$.gid') gid, client_type, get_json_object(params, '$.app_version') app_version, event_id, user_id, sdk_version, biz_type, gif, net_type, user_agent, user_number, biz_name, ngx_timestamp, 0 as is_video, dt from dw.tab2 where dt =${dt} )as union_tab )as out_tab where (web_v1>2 or (web_v1 >= 2 and web_v2 >= 1)) or (app_v1>4 or (app_v1 >= 4 and app_v2 >= 23))

 

posted @ 2021-08-26 15:21  Robots2  阅读(137)  评论(0)    收藏  举报