MySQL 计算最大值、最小值和中位数
知识点:
- 临时表
- 字符串处理
- 视图
- Cursor(游标)
- 条件 IF ELSE
- 循环 Loop
数据库的dump脚本:Github
需求
对比指定图书在给定时间区间的订单金额的最大值、最小值和中位数。
要求使用Stored Procedure实现。
输入
- 图书列表(出版社 + 作者 + 书名)
- 时间区间 (开始日期,结束日期)
输出
- 每本图书在指定时间区间内的最大值、最小值和中位数。
思路
Stored Procedure参数定义
此SP参数定义的关键点:
- 如何传入书名的列表?
- 如果传入书的全名:
出版社 + 作者 + 书名
,并方便作为where的筛选条件(DB表里是分开的三个字段)?
方案一:拼接为一个字符串 传入
把书列表用 ,
分割;单个书的字段信息用 #
分割。
示例:
'电子工业出版社#孙卫琴#Tomcat与Java,信息出版社#孙卫#Java核心编程'
此方案的前提是 书名、出版社、作者名里面不能包含有,
和 #
。
方案二:还没找到其他比较好的方法
TBD
流程
- 定义临时表来保存处理后的书参数:
出版社 + 作者 + 书名
; - 定临时表,保存结果;
- 定义一个图书订单信息的视图,减少后续逻辑中的表join。
- 获取给定书的最大值和最小值,并存入结果表;
- 获取给定书的中位数,并更新结果表;
- 返回结果。
实现
订单视图
视图除了订单信息外,主要是把customer和book的信息也包含进来,方便后续逻辑的join。
drop view if exists view_customer_order;
create view view_customer_order as
select
co.no as order_no, -- 订单编号
co.order_date, -- 订单日期
co.total_price, -- 订单的总价
co.quantity, -- 订单里数据数量
cu.no customer_no, -- 客户编号
cu.first_name, -- 客户 名
cu.last_name, -- 客户 姓
bo.no as book_no, -- 图书 编号
bo.name as book_name, -- 书名
bo.author, -- 作者
bo.publisher, -- 出版社
bo.publish_date, -- 出版日期
bo.unit_price, -- 书 单价
bc.code as category_code, -- 书 分类码
bc.name as category_name -- 书 分类名
from customer_order co
join customer cu
on co.customer_id = cu.id
join book bo
on bo.id = co.book_id
join book_category bc
on bc.id = bo.category_id;
Stored Procedure实现
drop procedure if exists sp_get_book_price_day_analytics;
DELIMITER $$
create procedure sp_get_book_price_day_analytics
(
-- bookNames将会传入一个拼接好的字符串: 'publisher#author#book-name, publisher#author#book-name'
-- 1. 书的列表,以 ','分割
-- 2. 每一本书的格式: publisher#author#book-name,以'#'分割
-- 调用示例:call sp_get_book_price_day_analytics('邮电出版社#Richard#Java-Programming,北京出版#Richie#Compunter System', '2021-01-01', '2021-02-01');
-- 为什么这么传: 还没想到达到同样目的其他比较好的方法。
IN bookNames longtext,
IN startDay varchar(50),
IN endDay varchar(50)
)
BEGIN
DECLARE allBookNames longtext;
DECLARE books_delimiter char default ',';
DECLARE author_delimiter char default '#';
DECLARE curBookName varchar(200);
DECLARE curPublisher varchar(200);
DECLARE curAuthor varchar(200);
DECLARE done INT;
DECLARE pub_auth_book_cur CURSOR FOR
SELECT book_name as curBookName, publisher as curPublisher, author as curAuthor FROM publisher_author_book_tmp;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
set allBookNames = bookNames;
DROP TEMPORARY TABLE IF EXISTS books_day_price_result_tmp;
CREATE TEMPORARY TABLE `books_day_price_result_tmp`(
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`book_name` varchar(200),
`author` varchar(200),
`publisher` varchar(200),
`max_price` double,
`median_price` double,
`min_price` double,
PRIMARY KEY (`id`)
);
DROP TEMPORARY TABLE IF EXISTS publisher_author_book_tmp;
CREATE TEMPORARY TABLE `publisher_author_book_tmp`(
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`book_name` varchar(200),
`publisher` varchar(200),
`author` varchar(200),
PRIMARY KEY (`id`)
);
DROP TEMPORARY TABLE IF EXISTS books_with_author_tmp;
CREATE TEMPORARY TABLE books_with_author_tmp(book NVARCHAR(200));
-- 先拆分books 列表
-- 示例: '邮电出版社#Richard#Java-Programming,北京出版#Richie#Compunter System'
WHILE LOCATE(books_delimiter, allBookNames) > 1 DO
INSERT INTO books_with_author_tmp SELECT SUBSTRING_INDEX(allBookNames, books_delimiter, 1);
SET allBookNames = REPLACE (allBookNames, (SELECT LEFT(allBookNames, LOCATE(books_delimiter, allBookNames))),'');
END WHILE;
INSERT INTO books_with_author_tmp(book) VALUES(allBookNames);
-- 再拆分每个book的:book name, author 和publisher
INSERT INTO publisher_author_book_tmp (publisher, author, book_name)
SELECT
SUBSTRING_INDEX(book, author_delimiter, 1),
SUBSTRING_INDEX(SUBSTRING_INDEX(book, author_delimiter, 2), author_delimiter, -1),
SUBSTRING_INDEX(SUBSTRING_INDEX(book, author_delimiter, 3), author_delimiter, -1)
FROM books_with_author_tmp;
-- 获取max, min
--
INSERT INTO books_day_price_result_tmp(book_name, author, publisher, max_price, median_price, min_price)
SELECT co.book_name, co.author, co.publisher, max(co.total_price), 0, min(co.total_price)
from publisher_author_book_tmp tmp
join view_customer_order co
on tmp.author = co.author and tmp.publisher = co.publisher and tmp.book_name = co.book_name
where co.order_date >= startDay and co.order_date <= endDay
group by co.book_name, co.author, co.publisher;
-- 获取并更新median
--
OPEN pub_auth_book_cur;
read_loop : LOOP
FETCH pub_auth_book_cur into curBookName, curPublisher, curAuthor;
IF done=1 THEN
LEAVE read_loop;
END IF;
-- select curBookName, curPublisher, curAuthor; -- 调试用
-- 计算中位数
UPDATE books_day_price_result_tmp res set median_price =
(
SELECT AVG(vco2.total_price)
FROM
(
SELECT vco.total_price, @rownum:=@rownum+1 as `row_number`, @total_rows:=@rownum
FROM view_customer_order vco, (SELECT @rownum:=0) r
WHERE
vco.book_name = curBookName and vco.author = curAuthor and vco.publisher = curPublisher
and vco.order_date >= startDay and vco.order_date <= endDay
) as vco2
WHERE vco2.row_number IN (FLOOR((@total_rows+1)/2), FLOOR((@total_rows +2)/2))
)
WHERE res.book_name = curBookName and res.author = curAuthor and res.publisher = curPublisher;
END LOOP read_loop;
CLOSE pub_auth_book_cur;
SELECT * FROM books_day_price_result_tmp;
DROP TEMPORARY TABLE IF EXISTS books_day_price_result_tmp;
DROP TEMPORARY TABLE IF EXISTS publisher_author_book_tmp;
END $$;
DELIMITER ;