SQL子查询的感悟
postgres=# create table t1(id int primary key, info text, reg_time timestamp); CREATE TABLE postgres=# create table t2(id int, name text); CREATE TABLE postgres=# insert into t1 select generate_series(1, 10000),'lottu', now(); INSERT 0 10000 postgres=# insert into t2 select (random()*1000)::int, 'lottu'||id from generate_series(1,1000) id; INSERT 0 1000 postgres=# create index ind_t1_id on t1(id); CREATE INDEX
select * from t1 where id in (select id from t2);
postgres=# explain (analyze,verbose,costs,timing) select * from t1 where id in (select id from t2); QUERY PLAN ---------------------------------------------------------------------- Merge Join (cost=54.25..99.73 rows=628 width=18) (actual time=1.319..2.365 rows=628 loops=1) Output: t1.id, t1.info, t1.reg_time Inner Unique: true Merge Cond: (t1.id = t2.id) -> Index Scan using ind_t1_id on public.t1 (cost=0.29..337.29 rows=10000 width=18) (actual time=0.014..0.421 rows=997 loops=1) Output: t1.id, t1.info, t1.reg_time -> Sort (cost=53.97..55.54 rows=628 width=4) (actual time=1.298..1.387 rows=628 loops=1) Output: t2.id Sort Key: t2.id Sort Method: quicksort Memory: 54kB -> HashAggregate (cost=18.50..24.78 rows=628 width=4) (actual time=0.730..0.877 rows=628 loops=1) Output: t2.id Group Key: t2.id -> Seq Scan on public.t2 (cost=0.00..16.00 rows=1000 width=4) (actual time=0.013..0.267 rows=1000 loops=1) Output: t2.id Planning Time: 0.454 ms Execution Time: 2.507 ms (17 rows)
从该执行计划可以看到很多信息;
- 其中获取的行数只有62
- 执行时间是
若采用join的方式
postgres=# explain (analyze,verbose,costs,timing) select t1,* from t1 , t2 where t1.id = t2.id ; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------ Merge Join (cost=66.11..117.17 rows=1000 width=72) (actual time=0.601..2.184 rows=1000 loops=1) Output: t1.*, t1.id, t1.info, t1.reg_time, t2.id, t2.name Merge Cond: (t1.id = t2.id) -> Index Scan using ind_t1_id on public.t1 (cost=0.29..337.29 rows=10000 width=60) (actual time=0.021..0.726 rows=997 loops=1) Output: t1.*, t1.id, t1.info, t1.reg_time -> Sort (cost=65.83..68.33 rows=1000 width=12) (actual time=0.573..0.721 rows=1000 loops=1) Output: t2.id, t2.name Sort Key: t2.id Sort Method: quicksort Memory: 71kB -> Seq Scan on public.t2 (cost=0.00..16.00 rows=1000 width=12) (actual time=0.013..0.226 rows=1000 loops=1) Output: t2.id, t2.name Planning Time: 0.288 ms Execution Time: 2.421 ms (13 rows)
采用array的方式改写
postgres=# explain (analyze,verbose,costs,timing) select * from t1 where id = any(array(select id from t2)); QUERY PLAN --------------------------------------------------------------------------------------------------------------------------- Index Scan using ind_t1_id on public.t1 (cost=16.29..59.03 rows=10 width=18) (actual time=0.418..1.108 rows=628 loops=1) Output: t1.id, t1.info, t1.reg_time Index Cond: (t1.id = ANY ($0)) InitPlan 1 (returns $0) -> Seq Scan on public.t2 (cost=0.00..16.00 rows=1000 width=4) (actual time=0.014..0.127 rows=1000 loops=1) Output: t2.id Planning Time: 0.106 ms Execution Time: 1.178 ms (8 rows)
select * from t1 where id in (select id from t2 where id <= 1000); 或者 with t as (select id from t2 where id <= 1000) select t1.* from t1 where id in (select id from t);
我建议可以用一个子表用来存放
select id from t2 where id <= 1000);