正则
数组转字符串
concat_ws
替换html元素
REGEXP_REPLACE(ticket_content,'<([^>]*)>|( )|( )','')
字符串去重
concat_ws(',',array_distinct(split(replace(replace(sku_info,'[',''),']',''),',')))
获取json数组中指定字段的所有值
-- [{"id": 1, "name": "张三", "nickname": ""}, {"id": 2, "name": "李四", "nickname": ""}, {"id": 3, "name": "王武", "nickname": ""}, {"id": 6, "name": "小明", "nickname": ""}]
with t1 as (
select id , GET_JSON_OBJECT(id_json, '$.id') partner_id
from test_table
lateral view explode(split(
-- 替换}, {为};{
replace(
-- 替换方括号为空
regexp_replace(
dresser_members
,'\\[|\\]','')
, '}, {','};{'), ';')) views as id_json
where ds = '20220424'
)
select id, collect_set(partner_id) partner_ids
from t1
group by id
;
替换文件分隔符\u0001
unicode转换
![](https://img2022.cnblogs.com/blog/2009668/202205/2009668-20220510154716343-1760637860.png)
![](https://img2022.cnblogs.com/blog/2009668/202205/2009668-20220510154647494-512832381.png)
json字符串数组转数组
select
-- 转换成数组
split(
-- 替换},{ 为};{
replace(
-- 替换中括号为空
replace(replace('[{"created_at":"2021-01-26 17:45:21","jxjy_num":1,"sale_type":"ds","input_staff_id":612142},{"created_at":"2021-01-26 09:45:46","jxjy_num":2,"input_staff_id":612142},{"created_at":"2021-01-26 09:45:46","jxjy_num":1,"input_staff_id":612142}]'
,'[',''
)
, ']', ''
), '},{','};{'
), ';'
)
;
计算
连续登陆天数
SELECT
distinct_id
, DATE_SUB(date_col, rk) d_group
, min(date_col) begin_date
, max(date_col) end_date
, count(1) continuous_days
FROM
(
SELECT
distinct_id
, date_col
, row_number() OVER (PARTITION BY distinct_id ORDER BY date_col ASC) rk
FROM
(
SELECT DISTINCT
distinct_id
,FROM_UNIXTIME(cast(`time` / 1000 as bigint), 'yyyy-MM-dd HH:mm:ss') date_col
-- , from_unixtime(cast("time" as bigint) / 1000) date_col
FROM dwd_log_track_user_event_di
where ds >= '20220501'
and ds <= '20220515'
) temp_1
) temp_2
GROUP BY distinct_id
,DATE_SUB(date_col, rk)
;