CTE Recursion Performance
CTE全名是Common Table Expression,语法基础请参考MSDN文档:https://msdn.microsoft.com/zh-cn/library/ms175972.aspx。
CTE Recursion诞生之时,着实让人惊艳了一把。被很多吃瓜群众以讹传讹之后,“慢”似乎成了CTE Recursion最大的原罪。
很多时候,用到CTE Recursion的场景,无非是千八百条的数据量,最大也不过万八千条,所以“慢”算不上个问题。直到前几天,一个群友问:500W的数据做CTE递归时,怎么做性能优化……
结论:
- 合理的索引会极大的提升CTE Recursion的性能;
- 根据实验结果猜测:Sql Server2016对CTE Recursion做了优化,缺失合理索引的前提下,性能有极大的提升(受测试样本影响,结果可能不准确。无论如何,索引可以帮到你)。
Talk is cheap,Show me the code!所以,原因如下图(样本数据:1W零1条^^):
拒绝耍流氓,测试代码如下:
1 IF OBJECT_ID('dbo.TestCte', 'U') IS NOT NULL
2 DROP TABLE dbo.TestCte;
3 GO
4 CREATE TABLE dbo.TestCte
5 (
6 Id VARCHAR(10) NOT NULL ,
7 ParentId VARCHAR(10) NULL
8 );
9 WITH cte_001
10 AS ( SELECT 1 AS a UNION ALL
11 SELECT 2 AS a UNION ALL
12 SELECT 3 AS a UNION ALL
13 SELECT 4 AS a UNION ALL
14 SELECT 5 AS a UNION ALL
15 SELECT 6 AS a UNION ALL
16 SELECT 7 AS a UNION ALL
17 SELECT 8 AS a UNION ALL
18 SELECT 9 AS a UNION ALL
19 SELECT 10 AS a )
20 INSERT dbo.TestCte
21 ( Id, ParentId )
22 SELECT RIGHT( '0000000000' + CAST ( T01.Id AS VARCHAR(10) ), 10 ) AS Id ,
23 RIGHT( '0000000000' + CAST ( CEILING( T01.Id / 10 ) AS VARCHAR(10) ), 10 ) AS ParentId
24 FROM (
25 SELECT ROW_NUMBER() OVER ( ORDER BY cte_001.a ) AS Id
26 FROM cte_001
27 CROSS JOIN cte_001 AS A
28 CROSS JOIN cte_001 AS B
29 CROSS JOIN cte_001 AS C
30 CROSS JOIN cte_001 AS D
31 CROSS JOIN cte_001 AS E
32 CROSS JOIN (SELECT TOP 5 * FROM cte_001) AS F ) AS T01;
33 GO
34
35 INSERT DBO.TestCte ( Id, ParentId )
36 VALUES ( '0000000000', NULL );
37 GO
1 --无索引版本
2 SET STATISTICS TIME ON;
3 SET STATISTICS IO ON;
4
5 IF OBJECT_ID('dbo.T', 'U') IS NOT NULL
6 DROP TABLE dbo.T;
7 GO
8 CREATE TABLE dbo.T
9 (
10 RN UNIQUEIDENTIFIER PRIMARY KEY,
11 Id VARCHAR(10) ,
12 ParentId VARCHAR(10) ,
13 Memo1 NVARCHAR(128) DEFAULT ( N'我是占位置的!我是占位置的!我是占位置的!我是占位置的!我是占位置的!' ) ,
14 Memo2 NVARCHAR(128) DEFAULT ( N'我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!' )
15 );
16 INSERT dbo.T
17 ( RN ,
18 Id ,
19 ParentId
20 )
21 SELECT N.RN ,
22 N.Id ,
23 N.ParentId
24 FROM ( SELECT NEWID() AS RN ,
25 Id ,
26 ParentId
27 FROM dbo.TestCte
28 WHERE Id < 10001--测试数据量,改这里
29 ) AS N
30 ORDER BY RN ASC;
31 GO
32 WITH cte_001
33 AS ( SELECT Id ,
34 ParentId
35 FROM dbo.T
36 WHERE ParentId IS NULL
37 UNION ALL
38 SELECT T01.Id ,
39 T01.ParentId
40 FROM T AS T01
41 INNER JOIN cte_001 AS T02 ON T02.Id = T01.ParentId
42 )
43 SELECT COUNT(*)
44 FROM cte_001;
1 --有索引版本
2 SET STATISTICS TIME ON;
3 SET STATISTICS IO ON;
4 IF OBJECT_ID('dbo.T', 'U') IS NOT NULL
5 DROP TABLE dbo.T;
6 GO
7 CREATE TABLE dbo.T
8 (
9 RN UNIQUEIDENTIFIER PRIMARY KEY,
10 Id VARCHAR(10) ,
11 ParentId VARCHAR(10) ,
12 Memo1 NVARCHAR(128) DEFAULT ( N'我是占位置的!我是占位置的!我是占位置的!我是占位置的!我是占位置的!' ) ,
13 Memo2 NVARCHAR(128) DEFAULT ( N'我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!' )
14 );
15 INSERT dbo.T ( RN , Id , ParentId )
16 SELECT N.RN , N.Id , N.ParentId
17 FROM ( SELECT NEWID() AS RN , Id , ParentId
18 FROM dbo.TestCte
19 WHERE Id < 10001--测试数据量,改这里
20 ) AS N
21 ORDER BY RN ASC;
22 GO
23
24 --创建索引
25 CREATE NONCLUSTERED INDEX IDX_DBO_T_PARENTID_ID
26 ON [dbo].[T] ([ParentId], Id)
27 GO
28
29 WITH cte_001
30 AS ( SELECT Id , ParentId
31 FROM dbo.T
32 WHERE ParentId IS NULL
33 UNION ALL
34 SELECT T01.Id , T01.ParentId
35 FROM T AS T01
36 INNER JOIN cte_001 AS T02 ON T02.Id = T01.ParentId
37 )
38 SELECT COUNT(*)
39 FROM cte_001;