clickhouse获取差集交集的方式新老版本对比
1.新版本
1)intersect
求多个数据集在某一维度上的交集,适合在用户分群等类似业务场景使用
select count( 1) from ( select id as create_user from app.user_model where 1=1 and product_count>=10 and product_count<=300 intersect select create_user from app.work_basic_model where total_uv>=100 and total_uv<=350 intersect select create_user from app.work_basic_model where total_uv>=200 and total_uv<=350 )
--类似: with (select groupUniqArray(u_i) from (select id as u_i from (select *from app.user_model where 1=1 and product_count>=10 and product_count<=300 ) )) as u0, (select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=100 and total_uv<=350 ) )) as u1 , (select groupUniqArray(create_user) from (select create_user from (select * from app.work_basic_model where total_uv>=200 and total_uv<=350 ) )) as u2 select length(arrayIntersect(u0,u1,u2)) as u
2)except
用第一个查询子集与后面所有子集求差集
select arrayJoin([1,2,3,4]) except select arrayJoin([1,2]) except select arrayJoin([4,5]); SELECT arrayJoin([1, 2, 3, 4]) EXCEPT SELECT arrayJoin([1, 2]) EXCEPT SELECT arrayJoin([4, 5]) Query id: 2dc2e297-b102-43de-b763-3cb9d436b18a ┌─arrayJoin([1, 2, 3, 4])─┐ │ 3 │ └─────────────────────────┘ 1 rows in set. Elapsed: 0.003 sec.
2.旧版本
1)bitmap实现
--表结构 create table q_imis_query ( task_id UInt64, imsi String, insert_time DateTime ); --1】交集查询 with (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 , (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02, (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03 select from q_imis_query where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270 and has(bitmapToArray(bitmapAnd(bitmapAnd(a01,a02),a03)),xxHash32(imsi))=1; --2】差集查询 with (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 , (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02, (select bitmapBuild(groupArray(xxHash32(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03 select from q_imis_query where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270 and has(bitmapToArray(bitmapAndnot(bitmapAndnot(a01,a02),a03)),xxHash32(imsi))=1;
2)数组实现
--1】交集查询 with (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 , (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02, (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03 select from q_imis_query where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270 and has(arrayConcat(arrayIntersect(a01,a02),arrayIntersect(a01,a03)),cityHash64(imsi))=1; --2】差集查询 with (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270) as a01 , (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(1271)%5000 and task_id=1271) as a02, (select bitmapBuild(groupArray(cityHash64(imsi))) from testa where cityHash64(task_id)%5000=cityHash64(8888)%5000 and task_id=8888) as a03 select from q_imis_query where cityHash64(task_id)%5000=cityHash64(1270)%5000 and task_id=1270 and has(arrayConcat(arrayIntersect(a01,a02),arrayIntersect(a01,a03)),cityHash64(imsi))=0;