mysql 之 group by 性能优化 查询与统计分离

背景介绍

记录共128W条!
 
SELECT cpe_id, COUNT(*) restarts
FROM business_log
WHERE operate_time>='2012-12-05 00:00:00' AND operate_time<'2018-01-05 00:00:00' AND operate_type=3 AND result=0
GROUP BY cpe_id
 
尝试对原SQL语句进行优化后发现,统计速度依旧没有获得满意的提升。单独运行条件查询语句(不包含GROUP BY和COUNT函数)后发现,查询的结果数据量只有6655条,耗时0.825s;加上统计语句后,时间飙升至3s。

 原理

mysql 解释:
MySQL说明文档中关于优化GROUP BY的部分指出:The most general way to satisfy a GROUP BY clause is to scan the whole table and create a new temporary table where all rows from each group are consecutive, and then use this temporary table to discover groups and apply aggregate functions (if any)。即,GROUP BY语句会扫描全表并新建一个临时表用来分组存放数据,然后根据临时表中的分组对数据执行聚合函数。现在的问题聚焦在:如果GROUP BY和WHERE在同一个语句中,这个“全表”指的是物理表还是WHERE过滤后的数据集合?
 
SELECT cpe_id, COUNT(*) restarts
FROM (
SELECT cpe_id
FROM business_log
WHERE operate_time>='2012-12-05 00:00:00' AND operate_time<'2018-01-05 00:00:00' AND operate_type=3 AND result=0
) t
GROUP BY cpe_id
---------------------
如上述语句所示,在查询语句外包了一个统计语句。执行结果:0.851s。时间消耗大幅减少!

结论

利用GROUP BY统计大数据时,应当将查询与统计分离,优化查询语句。
-
 
 

实际业务中

原查询过程
SELECT SUM(a.reportcount)reportcount,a.OrganizationId,a.organizationcode,a.organizationname,org.UpperComCode,org.organizationlevel FROM (SELECT COUNT(1) ReportCount, o.OrganizationCode ,o.organizationname, o.id OrganizationId,o.organizationlevel FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode WHERE o.OrganizationCode in ('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000') and r.ReportTime>'2019/4/25 0:00:00' and r.ReportReasonSubmitCode in ('A10006','A10007') and r.ReportTime<'2019/5/20 0:00:00' and r.LossTime>'2019/4/11 0:00:00' and r.LossTime<'2019/5/6 0:00:00' and r.InsuranceType in (5,6,7,8) AND r.CatasCollectionId=361 GROUP BY o.OrganizationCode,o.organizationname, o.id,o.organizationlevel union all SELECT COUNT(1) ReportCount, o.OrganizationCode,o.organizationname, o.id OrganizationId,o.organizationlevel FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode WHERE o.UpperComCode IN('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000')and r.ReportTime>'2019/4/25 0:00:00' and r.ReportReasonSubmitCode in ('A10006','A10007') and r.ReportTime<'2019/5/20 0:00:00' and r.LossTime>'2019/4/11 0:00:00' and r.LossTime<'2019/5/6 0:00:00' and r.InsuranceType in (5,6,7,8) AND r.CatasCollectionId=361 GROUP BY o.OrganizationCode,o.organizationname, o.id ,o.organizationlevel union all SELECT COUNT(1) ReportCount , o.UpperComCode OrganizationCode,o.ParentOrgName OrganizationName, o.ParentOrgId OrganizationId,o.organizationlevel FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode WHERE o.UpperComCode IN(SELECT OrganizationCode FROM organization WHERE UpperComCode IN ('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000' )and r.ReportTime>'2019/4/25 0:00:00' and r.ReportReasonSubmitCode in ('A10006','A10007') and r.ReportTime<'2019/5/20 0:00:00' and r.LossTime>'2019/4/11 0:00:00' and r.LossTime<'2019/5/6 0:00:00' and r.InsuranceType in (5,6,7,8) AND r.CatasCollectionId=361) GROUP BY o.UpperComCode ,o.ParentOrgName, o.ParentOrgId,o.organizationlevel union all SELECT SUM(reportcount) reportcount , organization.UpperComCode OrganizationCode, organization.ParentOrgName OrganizationName,organization.ParentOrgId OrganizationId,organization.OrganizationLevel FROM ( SELECT COUNT(1) reportcount , o.UpperComCode newcode, o.organizationlevel FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode WHERE o.UpperComCode IN( SELECT OrganizationCode FROM organization WHERE UpperComCode IN(SELECT OrganizationCode FROM organization WHERE UpperComCode IN ('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000' )and r.ReportTime>'2019/4/25 0:00:00' and r.ReportReasonSubmitCode in ('A10006','A10007') and r.ReportTime<'2019/5/20 0:00:00' and r.LossTime>'2019/4/11 0:00:00' and r.LossTime<'2019/5/6 0:00:00' and r.InsuranceType in (5,6,7,8) AND r.CatasCollectionId=361 ) ) GROUP BY o.UpperComCode, o.organizationlevel ) a JOIN organization ON a.newcode = organization.OrganizationCode GROUP BY organization.UpperComCode , organization.ParentOrgName ,organization.ParentOrgId ,organization.OrganizationLevel ) a JOIN organization org ON a.organizationcode=org.organizationcode GROUP BY a.organizationcode,a.organizationname,org.UpperComCode,org.organizationlevel,a.OrganizationId ORDER BY OrganizationLevel
优化结果
SELECT COUNT(1) AS reportcount,a.OrganizationId,a.organizationcode,a.organizationname,a.UpperComCode,a.organizationlevel FROM (
 
 
SELECT r.*,r.id AS reportid, o.OrganizationCode ,o.organizationname, o.id OrganizationId,o.organizationlevel ,o.UpperComCode
FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode
WHERE o.OrganizationCode IN ('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000') AND r.ReportTime>'2010/4/25 0:00:00' AND r.ReportReasonSubmitCode IN ('A10006','A10007') AND r.ReportTime<'2019/5/20 0:00:00' AND r.LossTime>'2010/4/11 0:00:00' AND r.LossTime<'2019/5/6 0:00:00' AND r.InsuranceType IN (5,6,7,8) AND r.CatasCollectionId=0
 
UNION ALL
SELECT r.id AS reportid , o.OrganizationCode,o.organizationname, o.id OrganizationId,o.organizationlevel ,o.UpperComCode
FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode
WHERE o.UpperComCode IN('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000')AND r.ReportTime>'2010/4/25 0:00:00' AND r.ReportReasonSubmitCode IN ('A10006','A10007') AND r.ReportTime<'2019/5/20 0:00:00' AND r.LossTime>'2010/4/11 0:00:00' AND r.LossTime<'2019/5/6 0:00:00' AND r.InsuranceType IN (5,6,7,8) AND r.CatasCollectionId=0
 
UNION ALL
SELECT r.id AS reportid , o.UpperComCode OrganizationCode,o.ParentOrgName OrganizationName, o.ParentOrgId OrganizationId,ou.organizationlevel ,ou.UpperComCode
FROM report r JOIN organization o ON r.OrganizationCode = o.OrganizationCode
JOIN organization ou ON o.UpperComCode=ou.OrganizationCode
WHERE o.UpperComCode IN
(SELECT OrganizationCode FROM organization WHERE UpperComCode IN
('44010000','44040000','44006600','44008800','44020000','44050000','44060000','44070000','44080000','44090000','44120000','44150000','44160000','44170000','44180000','44190000','44510000','44520000','44530000','44710000','44940000','44950000' )AND r.ReportTime>'2010/4/25 0:00:00' AND r.ReportReasonSubmitCode IN ('A10006','A10007') AND r.ReportTime<'2019/5/20 0:00:00' AND r.LossTime>'2010/4/11 0:00:00' AND r.LossTime<'2019/5/6 0:00:00' AND r.InsuranceType IN (5,6,7,8) AND r.CatasCollectionId=0)
 
) a
GROUP BY a.OrganizationCode,a.organizationname, a.OrganizationId,a.organizationlevel,a.UpperComCode
ORDER BY a.organizationlevel
 
posted @ 2019-04-17 09:58  赵坤坤  阅读(6255)  评论(2编辑  收藏  举报