常用sql

正则

数组转字符串

concat_ws

替换html元素

REGEXP_REPLACE(ticket_content,'<([^>]*)>|(&nbsp;)|(&nbsp)','')

字符串去重

concat_ws(',',array_distinct(split(replace(replace(sku_info,'[',''),']',''),',')))

获取json数组中指定字段的所有值

-- [{"id": 1, "name": "张三", "nickname": ""}, {"id": 2, "name": "李四", "nickname": ""}, {"id": 3, "name": "王武", "nickname": ""}, {"id": 6, "name": "小明", "nickname": ""}]
with t1 as (
  select id , GET_JSON_OBJECT(id_json, '$.id') partner_id
  from test_table
  lateral view explode(split(
-- 替换}, {为};{
  replace(
    -- 替换方括号为空
     regexp_replace(
     dresser_members
    ,'\\[|\\]','')
    , '}, {','};{'), ';'))  views as id_json
   where ds = '20220424'  
)
select id, collect_set(partner_id) partner_ids
from t1 
group by id
;

替换文件分隔符\u0001

unicode转换

json字符串数组转数组

select 
        -- 转换成数组
        split(
            -- 替换},{ 为};{
            replace(
                -- 替换中括号为空
                replace(replace('[{"created_at":"2021-01-26 17:45:21","jxjy_num":1,"sale_type":"ds","input_staff_id":612142},{"created_at":"2021-01-26 09:45:46","jxjy_num":2,"input_staff_id":612142},{"created_at":"2021-01-26 09:45:46","jxjy_num":1,"input_staff_id":612142}]'
                               ,'[',''
                        )
                    , ']', ''
                ), '},{','};{'
            ), ';'
        )
;

计算

连续登陆天数

SELECT
 distinct_id
, DATE_SUB(date_col, rk) d_group
, min(date_col) begin_date
, max(date_col) end_date
, count(1) continuous_days
FROM
 (
  SELECT
   distinct_id
  , date_col
  , row_number() OVER (PARTITION BY distinct_id ORDER BY date_col ASC) rk
  FROM
   (
   SELECT DISTINCT
    distinct_id
    ,FROM_UNIXTIME(cast(`time` / 1000 as bigint), 'yyyy-MM-dd HH:mm:ss') date_col
--    , from_unixtime(cast("time" as bigint) / 1000) date_col
   FROM dwd_log_track_user_event_di 
   where ds >= '20220501'
   and ds <= '20220515'
  )  temp_1
)  temp_2
GROUP BY distinct_id
       ,DATE_SUB(date_col, rk)
;
posted @ 2022-02-10 13:38  dch_21  阅读(105)  评论(0编辑  收藏  举报