SQL算法题
现有一个含三个字段的用户表,id/email/mobile, id是自动递增的,不具有重复性。当email和mobile有一个重复时即认为是同一个user, 请用一种方法查询表中实际有几个user?
--样例数据
CREATE TABLE tb_test(
id int ,
email varchar(10),
mobile varchar(20)
);
truncate table tb_test;
insert into tb_test values(1, 'a','E');
insert into tb_test values(2, 'b','D');
insert into tb_test values(3, 'c','D');
insert into tb_test values(4, 'c','A');
insert into tb_test values(5, 'd','B');
insert into tb_test values(6, 'e','A');
insert into tb_test values(7, 'e','F');
insert into tb_test values(8, 'e','E');
-- 正确答案: 2个
解法:
-- 正解
select count(distinct email)+count(distinct mobile)-count(distinct concat(email,mobile)) from tb_test;
--但一个特殊情况:就是=0的情况下要+1(见下图)
-- 我的解法:底层思路一致
select count(1) tt, sum(cnt-1) nn, (count(1)-sum(cnt-1))/2 as result
from (
select t.col, t.val, count(1) cnt
from (
SELECT a.id, 'col1' as col, a.email as val FROM tb_test a
union all
SELECT a.id, 'col2' as col, a.mobile FROM tb_test a
) t
group by t.col, t.val) t;