窗口函数 mysql 8.0版本前实现方式
sql oracle mysql 数据库 基础 知识 窗口函数 lag与lead函数 LISTAGG函数 connect by 结构化查询 备份和恢复 mysql5.7 mysql8.0 oracle_11g docker
窗口函数 mysql 8.0版本前实现方式
MySQL5.7.25和 8.0.16 环境中实现类似Oracle的分析函数(8.0版本中已支持,直接使用即可)。
CREATE TABLE test1 (
cus_no VARCHAR ( 10 ) DEFAULT NULL COMMENT '客户编号', -- 客户编号
age INT ( 4 ) DEFAULT NULL COMMENT '年龄', -- 年龄
tran_month VARCHAR ( 6 ) DEFAULT NULL COMMENT '交易月份', -- 交易月份
tran_date datetime DEFAULT NULL COMMENT '交易日期', -- 交易日期
tran_amt DECIMAL ( 20, 2 ) DEFAULT NULL COMMENT '交易金额' -- 交易金额
);
-- 插入测试数据
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191012',880.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191013',69.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191014',128.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191015',12.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191016',99.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191018',199.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201910','20191020',28.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101028',28,'201911','20191101',39.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201910','20191012',33.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201910','20191013',28.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201910','20191014',120.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201910','20191015',230.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201910','20191016',129.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201911','20191102',321.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201911','20191103',25.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201911','20191104',89.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101029',null,'201911','20191105',60.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101030',35,'201910','20191015',260.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101030',35,'201910','20191016',320.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101030',35,'201910','20191016',180.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101030',35,'201910','20191017',100.00);
insert into test1(cus_no,age,tran_month,tran_date,tran_amt) values('cus_101030',35,'201910','20191018',40.00);
模拟分区子句的使用
需求:统计每个客户在保留原来数据行的基础上增加一列统计列(消费总额)
例:
客户编号 年龄 交易月份 交易日期 交易金额 消费总额(追加列) 每个月的消费总额(追加列)
-- 实现简单,但是效率有问题,数据大不建意用
SELECT
t1.cus_no '客户编号',
t1.age '年龄',
t1.tran_month '交易月份',
t1.tran_date '交易日期',
t1.tran_amt '交易金额',
( SELECT sum( tran_amt ) FROM test1 WHERE cus_no = t1.cus_no GROUP BY cus_no ) AS '消费总额',-- 每个客户的消费总额
( SELECT sum( tran_amt ) FROM test1 WHERE cus_no = t1.cus_no AND tran_month = t1.tran_month GROUP BY cus_no, tran_month ) AS '每个月的消费总额' -- 每个客户每个月的消费总额
FROM
test1 AS t1;
模拟排序子句的使用
需求:统计每个客户逐日累计的消费金额
例:
2019-10-15当日消费金额 = 0+当日
2019-10-16当日消费金额 = 2019-10-15当日消费金额 + 当日
2019-10-17当日消费金额 = 2019-10-16当日消费金额 + 当日
以此类推。
SELECT
t11.cus_no '客户编号',
t11.fmt_date '交易日期',
t11.total_amt '交易金额',
sum( t12.total_amt ) '逐日累计消费金额'
FROM
-- 按客户编号及交易日期进行合计分组生成基础数据
( SELECT cus_no, sum( tran_amt ) total_amt, DATE_FORMAT(tran_date, '%Y%m%d') fmt_date FROM test1 GROUP BY cus_no, DATE_FORMAT(tran_date, '%Y%m%d') ) AS t11
INNER JOIN
( SELECT cus_no, sum( tran_amt ) total_amt, DATE_FORMAT(tran_date, '%Y%m%d') fmt_date FROM test1 GROUP BY cus_no, DATE_FORMAT(tran_date, '%Y%m%d') ) AS t12
ON t11.cus_no = t12.cus_no AND t11.fmt_date >= t12.fmt_date
GROUP BY t11.cus_no, t11.fmt_date
ORDER BY t11.cus_no, t11.fmt_date
分析
SELECT
t11.cus_no 't11客户编号',
t11.fmt_date 't11交易日期',
t11.total_amt 't11交易金额',
t12.fmt_date 't12交易日期',
t12.total_amt 't12交易金额'
-- sum( t12.total_amt ) '逐日累计消费金额',
FROM
-- 按客户编号及交易日期进行合计分组生成基础数据
( SELECT cus_no, sum( tran_amt ) total_amt, DATE_FORMAT(tran_date, '%Y%m%d') fmt_date FROM test1 GROUP BY cus_no, DATE_FORMAT(tran_date, '%Y%m%d') ) AS t11
INNER JOIN
( SELECT cus_no, sum( tran_amt ) total_amt, DATE_FORMAT(tran_date, '%Y%m%d') fmt_date FROM test1 GROUP BY cus_no, DATE_FORMAT(tran_date, '%Y%m%d') ) AS t12
ON t11.cus_no = t12.cus_no AND t11.fmt_date >= t12.fmt_date
WHERE t11.cus_no = 'cus_101028' -- 只看当前客户编号 cus_101028 的数据
-- GROUP BY t11.cus_no, t11.fmt_date -- 去掉取合分组 分析连表数据
ORDER BY t11.cus_no, t11.fmt_date
内联接 按t11.cus_no, t11.fmt_date排序,t11.cus_no, t11.fmt_date分组,
联接条件t11.fmt_date >= t12.fmt_date
模拟窗口子句的使用
需求:统计每个客户每个交易日期及其前后一天的消费总额
例:
日期 2019-10-15、2019-10-16、2019-10-17
2019-10-15 消费总额 = 0 + 2019-10-15 消费总额 + 2019-10-16 消费总额
2019-10-16 消费总额 = 2019-10-15 消费总额 + 2019-10-16 消费总额 + 2019-10-17 消费总额
2019-10-17 消费总额 = 2019-10-16 消费总额 + 2019-10-17 消费总额 + 2019-10-18 消费总额
以此类推。
SELECT
t1.cus_no '客户编号',
t1.age '年龄',
t1.tran_month '交易月份',
t1.tran_date '交易日期',
t1.tran_amt '交易金额',
( SELECT sum( tran_amt ) FROM test1
WHERE cus_no = t1.cus_no
AND date_format(tran_date, '%Y-%m-%d') >= date_sub(date_format(t1.tran_date, '%Y-%m-%d'), interval '1' day)
AND date_format(tran_date, '%Y-%m-%d') <= date_add(date_format(t1.tran_date, '%Y-%m-%d'), interval '1' day)
GROUP BY cus_no
) AS 'range消费总额',-- 统计每个客户每个交易日期及其前后一天的消费总额
(
SELECT sum(t_11.total_amt) FROM (
-- <2> 把处理好的数据增加一个行号 rownum
SELECT *, ( @row1 := @row1 + 1 ) AS rownum FROM (
-- <1> 把所有数据按客户编号 交易日期进行排序
SELECT *, date_format( tran_date, '%Y-%m-%d' ) AS fmt_date, sum(tran_amt) total_amt
FROM test1
GROUP BY cus_no, date_format( tran_date, '%Y-%m-%d' )
ORDER BY cus_no, date_format( tran_date, '%Y-%m-%d' )
-- <1>
) t_1, ( SELECT @row1 := 0 ) AS t
-- <2>
) t_11
INNER JOIN (
SELECT * FROM (
-- <2> 把处理好的数据增加一个行号 rownum 注意:row2的变量名不能与上面的相同
SELECT *, ( @row2 := @row2 + 1 ) AS rownum FROM (
-- <1> 把所有数据按客户编号 交易日期进行排序
SELECT *, date_format( tran_date, '%Y-%m-%d' ) AS fmt_date, sum(tran_amt) total_amt
FROM test1
GROUP BY cus_no, date_format( tran_date, '%Y-%m-%d' )
ORDER BY cus_no, date_format( tran_date, '%Y-%m-%d' )
-- <1>
) t_1, ( SELECT @row2 := 0 ) AS t
-- <2>
) t_12
) t_13 ON t_13.cus_no = t_11.cus_no
WHERE
-- t1表参数只能放在最外层使用
t_11.rownum >= t_13.rownum - 1 AND t_11.rownum <= t_13.rownum + 1 and t_13.cus_no = t1.cus_no and t_13.fmt_date = date_format(t1.tran_date, '%Y-%m-%d')
) AS 'rows消费总额' -- 统计每个客户每个交易日期及其前后一条的消费总额
FROM
test1 AS t1;