mysql json效率测试

表结构

另个表都是ID作为主键,另外5个字段类型相等,create_date 数据类型略有不同

#普通表,索引是后面的测试过程中添加的
CREATE TABLE `goods` (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL,
  `stock` int DEFAULT NULL,
  `des` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL,
  `des2` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL,
  `data` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci,
  `create_date` timestamp NULL DEFAULT NULL,
  PRIMARY KEY (`id`) USING BTREE,
  KEY `goods_des_IDX` (`des`) USING BTREE,
  KEY `goods_name_IDX` (`name`) USING BTREE,
  KEY `goods_stock_IDX` (`stock`,`name`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=1810224621868826706 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ROW_FORMAT=DYNAMIC;

#按照json数据结构存储的表
CREATE TABLE `goods3` (
  `id` bigint NOT NULL,
  `json_data` json NOT NULL,
  `name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.name'))) VIRTUAL,
  `stock` int GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.stock'))) VIRTUAL,
  `des` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.des'))) VIRTUAL,
  `des2` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.des2'))) VIRTUAL,
  `data` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.data'))) VIRTUAL,
  `create_date` bigint GENERATED ALWAYS AS (json_unquote(json_extract(`json_data`,_utf8mb4'$.createDate'))) VIRTUAL,
  PRIMARY KEY (`id`),
  KEY `goods3_des_IDX` (`des`) USING BTREE,
  KEY `goods3_name_IDX` (`name`) USING BTREE,
  KEY `goods3_stock_IDX` (`stock`,`name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

硬件说明

​ 虚拟机,分配4核1G内存,SSD

数据说明

都是2000W+数据,其中有2000W数据是两边一模一样的,另外少量对比插入时间操作,导致两边数据略微不相等。
数据例举

占用空间对比

普通表3.3G,json表6.1G

image-20240710140205285

查询全量count 对比

#-----------------------查询count-------------------------------------------没啥区别
#23s
SELECT count(*) from goods;
#23S
SELECT count(*) from goods3;

#23s
SELECT count(id) from goods;
#23s
SELECT count(id) from goods3;
#---------------------------深分页查询对比---------------------------------------性能约差3倍
#1s
SELECT  * from goods  limit 1000000,10;
#3s
SELECT  * from goods3  limit 1000000,10;

#5s
SELECT  * from goods  limit 5000000,10;
#15s
SELECT  * from goods3  limit 5000000,10;

#10s
SELECT  * from goods  limit 10000000,10;
#31s
SELECT  * from goods3  limit 10000000,10;

#16s
SELECT  * from goods  limit 15000000,10;
#47s
SELECT  * from goods3  limit 15000000,10;

#22s
SELECT  * from goods  limit 20000000,10;
#64s
SELECT  * from goods3  limit 20000000,10;

查询普通字段对比

#---------------------------无索引字段查询对比--------------------------------------性能约差3倍
#12s
SELECT  count(*) from goods where name  = "AA0";
#34s
SELECT  count(*) from goods3 where name = "AA0";


#11s
SELECT  count(*) from goods where name  = "ZB3";
#33s
SELECT  count(*) from goods3 where name = "ZB3";


#11s
SELECT  count(*) from goods where name  = "Gi6";
#34s
SELECT  count(*) from goods3 where name = "Gi6";

#14s
SELECT  count(*) from goods where name  = "oO5";
#33s
SELECT  count(*) from goods3 where name = "oO5";
#--查询stock,没有索引,普通数字字段-------------------- 差距依旧是约3倍
#11s
SELECT  count(*)   from goods where stock  = 8888;
#33s
SELECT   count(*)    from goods3 where stock  = 8888;

#11s
SELECT   count(*)    from goods where stock  = 176;
#32s
SELECT   count(*)    from goods3 where stock  = 176;


#11s
SELECT   count(*)    from goods where stock  = 5580;
#32s
SELECT  count(*)   from goods3 where stock  = 5580;


#11s
SELECT   count(*)    from goods where stock  = 1212;
#33s
SELECT  count(*)   from goods3 where stock  = 1212;

--------------下面的测试是添加索引以后---------------

有索引字段对比

#---------------------------添加索引后查询对比-------------------------------------- 索引以后差距不大,甚至json格式还快点
#0.016s
SELECT  count(*) from goods where name  = "QQ7";
#0.001s
SELECT  count(*) from goods3 where name = "QQ7";


#0.016s
SELECT  count(*) from goods where name  = "zy0";
#0.001s
SELECT  count(*) from goods3 where name = "zy0";


#0.016s
SELECT  count(*) from goods where name  = "tf4";
#0.002s
SELECT  count(*) from goods3 where name = "tf4";

#0.049s
SELECT  count(*) from goods where name  = "hu8";
#0.017s
SELECT  count(*) from goods3 where name = "hu8";

#------------------添加索引后查询对比(查全量数据)-------------------------------------- 结论差不多
#0.122s
SELECT  *  from goods where name  = "QQ7";
#0.141s
SELECT  *  from goods3 where name = "QQ7";


#0.106s
SELECT  *  from goods where name  = "zy0";
#0.097s
SELECT  *   from goods3 where name = "zy0";


#0.101s
SELECT  *   from goods where name  = "tf4";
#0.091s
SELECT  *   from goods3 where name = "tf4";

#0.104s
SELECT  *   from goods where name  = "hu8";
#0.099s
SELECT  *   from goods3 where name = "hu8";
#--查询另外一个索引,不是唯一索引,但是数据基本唯一---------------- 没区别,都很快
#0.019s
SELECT  *   from goods where des  = "oqiVb3pUoEo4CgqV";
#0.050s
SELECT  *   from goods3 where des = "oqiVb3pUoEo4CgqV";

#0.049s
SELECT  *   from goods where des  = "lHr9AQyItN57uVrJ";
#0.016s
SELECT  *   from goods3 where des = "lHr9AQyItN57uVrJ";

#0.015s
explain SELECT  *   from goods where des  = "buWN4squ1YjZNqzJ";
#0.049s
explain SELECT  *   from goods3 where des = "buWN4squ1YjZNqzJ";


#0.022s
SELECT  *   from goods where des  = "rGDCl3tr0uGOrcOd";
#0.017s
SELECT  *   from goods3 where des = "rGDCl3tr0uGOrcOd";

联合索引字段对比


#查询有一个索引字段一个不是索引字段,正常应该和使用索引效率接近------------------------
#0.337s
SELECT  count(*)  from goods where stock  = 8888 and name = "WXR";
#0.369
SELECT  count(*)  from goods3 where stock  = 8888 and name = "WXR";

#0.160s
SELECT  count(*)  from goods where stock  = 8888 and name = "MT0" ;
#0.184s
SELECT  count(*)  from goods3 where stock  = 8888 and name = "MT0";

结论

  1. json格式+虚拟列会占用更对的存储空间,json+虚拟列接近2倍的存储
    据说json格式使用CPU占用和IO会远远大于普通字段,暂时没想好怎么验证,存储占用更多是铁定的
  2. 如果没有索引,json+虚拟列的查询耗时约是普通字段的3倍
  3. 有索引并且索引命中的情况下,几乎没有区别,都很快

posted on 2024-07-10 14:12  zhangyukun  阅读(53)  评论(0编辑  收藏  举报

导航