clickhouse获取差集交集的方式新老版本对比

1.新版本

1)intersect
求多个数据集在某一维度上的交集,适合在用户分群等类似业务场景使用

select count( 1) from (
select id as create_user from app.user_model  where 1=1  and product_count>=10 and product_count<=300   
intersect 
select create_user from app.work_basic_model  where total_uv>=100 and total_uv<=350 
intersect
select create_user  from app.work_basic_model  where total_uv>=200 and total_uv<=350 
)

--类似: with (select groupUniqArray(u_i) from (select id as u_i from (select *from app.user_model where 1=1 and product_count>=10 and product_count<=300 ) )) as u0, (select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=100 and total_uv<=350 ) )) as u1 , (select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=200 and total_uv<=350 ) )) as u2 select length(arrayIntersect(u0,u1,u2)) as u

 

2)except

用第一个查询子集与后面所有子集求差集

select arrayJoin([1,2,3,4]) except select arrayJoin([1,2]) except select arrayJoin([4,5]);

SELECT arrayJoin([1, 2, 3, 4])
EXCEPT
SELECT arrayJoin([1, 2])
EXCEPT
SELECT arrayJoin([4, 5])

Query id: 2dc2e297-b102-43de-b763-3cb9d436b18a

┌─arrayJoin([1, 2, 3, 4])─┐
│                       3 │
└─────────────────────────┘

1 rows in set. Elapsed: 0.003 sec. 

2.旧版本

1)bitmap实现

--表结构

create table q_imis_query

(

task_id UInt64,

imsi String,

insert_time DateTime

);

--1】交集查询
with

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 ,

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02,

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03

select 

  from q_imis_query

where 

  cityHash64(task_id)%5000=cityHash64(1270)%5000

  and task_id=1270

  and has(bitmapToArray(bitmapAnd(bitmapAnd(a01,a02),a03)),xxHash32(imsi))=1;

--2】差集查询
with

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 ,

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02,

  (select  bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03

select 

  from q_imis_query

where 

  cityHash64(task_id)%5000=cityHash64(1270)%5000

  and task_id=1270

  and has(bitmapToArray(bitmapAndnot(bitmapAndnot(a01,a02),a03)),xxHash32(imsi))=1;

 

2)数组实现

--1】交集查询
with

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 ,

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02,

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03

select 

  from q_imis_query

where 

  cityHash64(task_id)%5000=cityHash64(1270)%5000

  and task_id=1270

  and has(arrayConcat(arrayIntersect(a01,a02),arrayIntersect(a01,a03)),cityHash64(imsi))=1;

--2】差集查询
with

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 ,

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02,

  (select  bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03

select 

  from q_imis_query

where 

  cityHash64(task_id)%5000=cityHash64(1270)%5000

  and task_id=1270

  and has(arrayConcat(arrayIntersect(a01,a02),arrayIntersect(a01,a03)),cityHash64(imsi))=0;

 

posted @ 2022-02-28 14:36  渐逝的星光  阅读(2526)  评论(0编辑  收藏  举报