在分析函数基础上性能提高5倍的postgresql/lightdb特性--使用LATERAL JOINS代替分析函数优化按照每大类查询TOP N语句

　　常规的做法中，按照每大类查询TOP N语句通常使用分析函数代替聚合函数进行优化（通常性能可以大幅度提升），实现的是扫描主表一次。但是order by是少不了的。如下：

CREATE TABLE tags (
  id serial PRIMARY KEY,
  name VARCHAR(255)
);

CREATE TABLE movies (
  id serial PRIMARY KEY,
  name VARCHAR(255),
  tag_id int NOT NULL,
  created_at timestamp NOT NULL DEFAULT NOW(),
  FOREIGN KEY (tag_id) REFERENCES tags(id) ON UPDATE CASCADE
);

CREATE INDEX movies_tag_id_index ON movies (tag_id);

-- Genres
INSERT INTO "tags"("name") VALUES('Action');
INSERT INTO "tags"("name") VALUES('Animation');
INSERT INTO "tags"("name") VALUES('Sci-Fi');

-- Movies
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('The Matrix', (SELECT id FROM "tags" where "name" = 'Action'), '1999-05-21');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Tenet', (SELECT id FROM "tags" where "name" = 'Action'), '2020-10-29');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Wonder Woman 1984', (SELECT id FROM "tags" where "name" = 'Action'), '2020-12-25');

INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Toy Story', (SELECT id FROM "tags" where "name" = 'Animation'), '1995-12-22');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Monsters Inc.', (SELECT id FROM "tags" where "name" = 'Animation'), '2001-11-14');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Finding Nemo', (SELECT id FROM "tags" where "name" = 'Animation'), '2003-07-4');

INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Arrival', (SELECT id FROM "tags" where "name" = 'Sci-Fi'), '2016-10-24');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('Minority Report', (SELECT id FROM "tags" where "name" = 'Sci-Fi'), '2002-08-02');
INSERT INTO "movies"("name", "tag_id", "created_at") VALUES('The Midnight Sky', (SELECT id FROM "tags" where "name" = 'Sci-Fi'), '2020-12-23');

-- Generates 3_000_000 movies
INSERT INTO "movies"("name", "tag_id")
SELECT
   generate_series(1,1000000) as "name",
   (SELECT id FROM "tags" where "name" = 'Action')
;

INSERT INTO "movies"("name", "tag_id")
SELECT
   generate_series(1,1000000) as "name",
   (SELECT id FROM "tags" where "name" = 'Animation')
;

INSERT INTO "movies"("name", "tag_id")
SELECT
   generate_series(1,1000000) as "name",
   (SELECT id FROM "tags" where "name" = 'Sci-Fi')
;

zjh@postgres=# explain analyze
zjh@postgres-# with movies_by_tags (tag_id, name, created_at, rank) as (
zjh@postgres(#   SELECT
zjh@postgres(#     tag_id,
zjh@postgres(#     name,
zjh@postgres(#     created_at,
zjh@postgres(#     ROW_NUMBER() OVER(PARTITION BY tag_id ORDER BY tag_id, created_at DESC)
zjh@postgres(#   FROM movies
zjh@postgres(# )
zjh@postgres-# select *
zjh@postgres-# from movies_by_tags mbt
zjh@postgres-# where mbt.rank < 3
zjh@postgres-# ;
                                                             QUERY PLAN                                                              
-------------------------------------------------------------------------------------------------------------------------------------
 Subquery Scan on mbt  (cost=118008.82..127372.23 rows=89175 width=536) (actual time=2348.745..7563.132 rows=6 loops=1)
   Filter: (mbt.rank < 3)
   Rows Removed by Filter: 2999994
   ->  WindowAgg  (cost=118008.82..124028.15 rows=267526 width=536) (actual time=2348.740..7169.455 rows=3000000 loops=1)
         ->  Sort  (cost=118008.82..118677.63 rows=267526 width=528) (actual time=1804.146..2247.689 rows=3000000 loops=1)
               Sort Key: movies.tag_id, movies.created_at DESC
               Sort Method: external merge  Disk: 96624kB
               ->  Seq Scan on movies  (cost=0.00..21784.26 rows=267526 width=528) (actual time=0.021..589.504 rows=3000000 loops=1)
 Planning Time: 0.205 ms
 Execution Time: 7580.399 ms
(10 rows)

zjh@postgres=# explain analyze
SELECT *
FROM tags t
JOIN LATERAL (
  SELECT m.*
  FROM movies m
  WHERE m.tag_id = t.id
  ORDER BY m.created_at DESC
  FETCH FIRST 2 ROWS ONLY
) e1 ON true
;
                                                                           QUERY PLAN                                                                           
----------------------------------------------------------------------------------------------------------------------------------------------------------------
 Nested Loop  (cost=1350.22..189048.15 rows=280 width=1052) (actual time=449.138..1347.527 rows=6 loops=1)
   ->  Seq Scan on tags t  (cost=0.00..11.40 rows=140 width=520) (actual time=0.010..0.014 rows=3 loops=1)
   ->  Limit  (cost=1350.22..1350.22 rows=2 width=532) (actual time=449.163..449.163 rows=2 loops=3)
         ->  Sort  (cost=1350.22..1353.56 rows=1338 width=532) (actual time=449.159..449.160 rows=2 loops=3)
               Sort Key: m.created_at DESC
               Sort Method: top-N heapsort  Memory: 25kB
               ->  Index Scan using movies_tag_id_index on movies m  (cost=0.42..1336.84 rows=1338 width=532) (actual time=0.028..302.523 rows=1000000 loops=3)
                     Index Cond: (tag_id = t.id)
 Planning Time: 0.163 ms
 Execution Time: 1347.567 ms
(10 rows)

https://www.cybertec-postgresql.com/en/understanding-lateral-joins-in-postgresql/#

https://amandasposito.com/postgresql/performance/2021/01/04/postgres-lateral-join.html

https://blog.csdn.net/carcarrot/article/details/107174468

https://blog.csdn.net/horses/article/details/86510905 mysql 8.0.14支持

https://vladmihalcea.com/sql-lateral-join/ Oracle 12c支持

https://blog.csdn.net/carcarrot/article/details/107174468 pg 9.3支持

sql server可通过cross join模拟

posted @ 2022-02-09 14:04 zhjh256 阅读(91) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

在分析函数基础上性能提高5倍的postgresql/lightdb特性--使用LATERAL JOINS代替分析函数优化按照每大类查询TOP N语句

公告