【GreatSQL优化器-17】DYNAMIC RANGE
【GreatSQL优化器-17】DYNAMIC RANGE
一、DYNAMIC RANGE介绍
GreatSQL 的优化器有一种扫描方式是动态范围扫描方式,类似于“已读乱回”模式,这种模式是在表有多个索引的情况下,对驱动表连接的时候部分选择索引的情况。优化器没有找到好的索引可以使用,但发现在知道前面表的列值后,可能会使用某些索引。对于前面表中的每个行组合,优化器检查是否可以使用 range 或 index merge 访问方法来检索行。虽然这不是很快,但比执行完全没有索引的连接要快。
下面用一个简单的例子来说明直方图怎么应用在优化器。
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT,date1 DATETIME); INSERT INTO t1 VALUES (1,10,'2021-03-25 16:44:00.123456'),(2,1,'2022-03-26 16:44:00.123456'),(3,4,'2023-03-27 16:44:00.123456'),(5,5,'2024-03-25 16:44:00.123456'),(7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456'); CREATE TABLE t2 (cc1 INT PRIMARY KEY, cc2 INT); INSERT INTO t2 VALUES (1,3),(2,1),(3,2),(4,3),(5,15); CREATE TABLE t3 (ccc1 INT, ccc2 varchar(100)); INSERT INTO t3 VALUES (1,'aa1'),(2,'bb1'),(3,'cc1'),(4,'dd1'),(null,'ee'); CREATE INDEX idx1 ON t1(c2); CREATE INDEX idx2 ON t1(c2,date1); CREATE INDEX idx2_1 ON t2(cc2); CREATE INDEX idx3_1 ON t3(ccc1); greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5; +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL | | 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) | -- 这里的结果出现了Range checked,说明进行了DYNAMIC_RANGE +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ greatsql> SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5; +----+------+---------------------+------+------+ | c1 | c2 | date1 | ccc1 | ccc2 | +----+------+---------------------+------+------+ | 1 | 10 | 2021-03-25 16:44:00 | 1 | aa1 | | 2 | 1 | 2022-03-26 16:44:00 | 1 | aa1 | | 3 | 4 | 2023-03-27 16:44:00 | 1 | aa1 | | 2 | 1 | 2022-03-26 16:44:00 | 2 | bb1 | | 3 | 4 | 2023-03-27 16:44:00 | 2 | bb1 | | 2 | 1 | 2022-03-26 16:44:00 | 3 | cc1 | | 3 | 4 | 2023-03-27 16:44:00 | 3 | cc1 | | 2 | 1 | 2022-03-26 16:44:00 | 4 | dd1 | | 3 | 4 | 2023-03-27 16:44:00 | 4 | dd1 | | 2 | 1 | 2022-03-26 16:44:00 | NULL | ee | | 3 | 4 | 2023-03-27 16:44:00 | NULL | ee | +----+------+---------------------+------+------+ greatsql> SELECT * FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; "attaching_conditions_to_tables": { "original_condition": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))", "attached_conditions_computation": [ { "table": "`t1`", "rechecking_index_usage": { -- 这里对t1进行了recheck "recheck_reason": "not_first_table", "range_analysis": { "table_scan": { "rows": 7, "cost": 3.05 }, "potential_range_indexes": [ { "index": "PRIMARY", "usable": true, "key_parts": [ "c1" ] }, { "index": "idx1", "usable": true, "key_parts": [ "c2", "c1" ] }, { "index": "idx2", "usable": true, "key_parts": [ "c2", "date1", "c1" ] } ], "best_covering_index_scan": { "index": "idx2", "cost": 0.952742, "chosen": true }, "setup_range_conditions": [ ], "group_index_range": { "chosen": false, "cause": "not_single_table" }, "skip_scan_range": { "chosen": false, "cause": "not_single_table" }, "analyzing_range_alternatives": { "range_scan_alternatives": [ ], "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" } }, "analyzing_index_merge_union": [ { "indexes_to_merge": [ { "range_scan_alternatives": [ { "index": "PRIMARY", "chosen": false, "cause": "depends_on_unread_values" } ], "chosen": false, "cause": "cost" }, { "range_scan_alternatives": [ { "index": "idx1", "ranges": [ "NULL < c2 < 5" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": true, "in_memory": 1, "rows": 2, "cost": 0.460274, "chosen": true }, { "index": "idx2", "ranges": [ "NULL < c2 < 5" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": true, "in_memory": 1, "rows": 2, "cost": 0.460457, "chosen": false, "cause": "cost" } ], "chosen": false, "cause": "cost" } ], "cost_of_reading_ranges": 0, "chosen": false, "cause": "cost" } ] } } } ], "attached_conditions_summary": [ { "table": "`t3`", "attached": null }, { "table": "`t1`", "attached": "((`t1`.`c1` = `t3`.`ccc1`) or (`t1`.`c2` < 5))" } ] } "join_execution": { "select#": 1, "steps": [ { "rows_estimation_per_outer_row": { -- 这里只截取t3读第一行时候t1的索引选择 "table": "`t1`", "range_analysis": { "table_scan": { "rows": 7, "cost": 3.05 }, "potential_range_indexes": [ { "index": "PRIMARY", "usable": true, "key_parts": [ "c1" ] }, { "index": "idx1", "usable": true, "key_parts": [ "c2", "c1" ] }, { "index": "idx2", "usable": true, "key_parts": [ "c2", "date1", "c1" ] } ], "best_covering_index_scan": { "index": "idx2", "cost": 0.952742, "chosen": true }, "setup_range_conditions": [ ], "group_index_range": { "chosen": false, "cause": "not_single_table" }, "skip_scan_range": { "chosen": false, "cause": "not_single_table" }, "analyzing_range_alternatives": { "range_scan_alternatives": [ ], "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" } }, "analyzing_index_merge_union": [ { "indexes_to_merge": [ { "range_scan_alternatives": [ { "index": "PRIMARY", "ranges": [ "c1 = 1" ], "index_dives_for_eq_ranges": true, "rowid_ordered": true, "using_mrr": false, "index_only": true, "in_memory": 1, "rows": 1, "cost": 0.36, "chosen": true } ], "index_to_merge": "PRIMARY", "cumulated_cost": 0.36 }, { "range_scan_alternatives": [ { "index": "idx1", "ranges": [ "NULL < c2 < 5" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": true, "in_memory": 1, "rows": 2, "cost": 0.460274, "chosen": true }, { "index": "idx2", "ranges": [ "NULL < c2 < 5" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": true, "in_memory": 1, "rows": 2, "cost": 0.460457, "chosen": false, "cause": "cost" } ], "index_to_merge": "idx1", "cumulated_cost": 0.820274 } ], "cost_of_reading_ranges": 0.820274, "cost_of_mapping_rowid_in_non_clustered_pk_scan": 0.1, "cost_sort_rowid_and_read_disk": 0.4375, "use_roworder_index_merge": true, -- 根据上面结果选择了主键合并idx1的结果 "cause": "cost" } ] } } }, greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5; +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL | | 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 42.85 | Range checked for each record (index map: 0x7) | -- 这里0x7指的是t1表的3条索引 +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ -- 驱动表t3一共5条数据,每行记录都进行了一次test_quick_select()来查找t1是否有可用的更优索引
详细DYNAMIC RANGE
执行过程如下表所示
t3.ccc1 | t1全表扫描cost | t1索引union | t1索引sort-union的cost | t1联合索引idx2 | t1范围扫描 | 结果 |
---|---|---|---|---|---|---|
1 | 3.8 | 排序[PRIMARY,idx1,idx2], idx2加进去的cost太大放弃 | 1.58027 | 1.71097 | 无 | PRIMARY,idx1索引合并 |
2 | 3.8 | 排序[PRIMARY,idx1,idx2], idx2的cost太大放弃 | 1.58027 | 1.71097 | 无 | PRIMARY,idx1索引合并 |
3 | 3.8 | 排序[PRIMARY,idx1,idx2], idx2的cost太大放弃 | 1.58027 | 1.71097 | 无 | PRIMARY,idx1索引合并 |
4 | 3.8 | 排序[PRIMARY,idx1,idx2], idx2的cost太大放弃 | 1.58027 | 1.71097 | 无 | PRIMARY,idx1索引合并 |
null | 3.8 | 排序[idx1,idx2],cost太大, 不用合并 | idx1的cost太大,不用合并 | 1.71097 | idx2 1.21183 | 索引idx2 |
二、相关代码解释
DYNAMIC_RANGE 的判断在对表join排序以后执行make_join_query_block
的时候,这时候要对非 join 第一张表做 recheck,recheck 以后有可能让表变为 DYNAMIC_RANGE 扫描方式。
static bool make_join_query_block(JOIN *join, Item *cond) { // 满足以下条件的需要进行RECHECK,要么是非第一张表要么是有limit语句 if ((tab->type() == JT_ALL || tab->type() == JT_RANGE || tab->type() == JT_INDEX_MERGE || tab->type() == JT_INDEX_SCAN) && tab->use_quick != QS_RANGE) { if (cond && // 1a (tab->keys() != tab->const_keys) && // 1b (i > 0 || // 1c (join->query_block->master_query_expression()->item && cond->is_outer_reference()))) recheck_reason = NOT_FIRST_TABLE; else if (!tab->const_keys.is_clear_all() && // 2a i == join->const_tables && // 2b (join->query_expression()->select_limit_cnt < (tab->position()->rows_fetched * tab->position()->filter_effect)) && // 2c !join->calc_found_rows) // 2d recheck_reason = LOW_LIMIT; // 满足这个if条件的执行QS_DYNAMIC_RANGE,详情见下面几张表 if (!tab->table()->quick_keys.is_subset(tab->checked_keys) || !tab->needed_reg.is_subset(tab->checked_keys)) { tab->keys().merge(tab->table()->quick_keys); tab->keys().merge(tab->needed_reg); if (!tab->needed_reg.is_clear_all() && (tab->table()->quick_keys.is_clear_all() || (tab->range_scan() && (tab->range_scan()->num_output_rows() >= 100.0)))) { tab->use_quick = QS_DYNAMIC_RANGE; tab->set_type(JT_ALL); } else tab->use_quick = QS_RANGE; } } } // 实际使用代码 bool DynamicRangeIterator::Init() { // 每次迭代器开始的时候先获取最佳索引组合和AccessPath AccessPath *range_scan; int rc = test_quick_select(&range_scan); }
表一:需要进行 recheck 的原因
场景 | 条件 |
---|---|
非join表连接的第一张表 | 存在针对这张表的条件 |
连接的第一张表 | 没有第一张表的列与常量值的比较,limit值小于预估的满足条件的行数 |
表二:quick_type 类型
类型 | 说明 |
---|---|
QS_NONE | 不使用快速扫描 |
QS_RANGE | 使用范围扫描 |
QS_DYNAMIC_RANGE | 使用动态范围 |
表三:表用 QS_DYNAMIC_RANGE 的场合
条件(以下必须全部满足) | 说明 |
---|---|
条件列存在索引 | 有机会生成mm tree |
keyuse_array数组没有值 | 有值的话执行REF扫描,一般有OR条件时候导致该数组没有值 |
表没有对应的mm tree | 无法生成mm tree的条件 |
这张表不是join连接的第一张表 | |
对表执行recheck | recheck的原因见表一 |
recheck以后生成AccessPath但是找到的范围内行数大于等于100行 |
三、实际例子说明
接下来看几个例子来说明上面的代码:
greatsql> EXPLAIN SELECT * FROM t1 join t3 ON t1.c1=t3.ccc1 or t1.c2<5; +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | NULL | | 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 43.67 | Range checked for each record (index map: 0x7) | +----+-------------+-------+------------+------+-------------------+------+---------+------+------+----------+------------------------------------------------+ -- 第一次对单表进行估计的时候t1.c1=t3.ccc1条件没有加入到mm tree,只对t1.c1打了一个标记MAYBE_KEY表示后面可能可以用到这个索引,因此最后没有生成mm tree,在make_join_query_block因为已经确定表连接顺序可以使用t1.c1=t3.ccc1条件,这时候可以把两个条件结合起来对t1执行mm tree,因此这个时候才生成mm tree,而这个时候才能考虑range scan和索引合并等操作。
t1 的 JOIN_TAB 的两个相关 key 变量,因此 keys() != const_keys
key | idx2(c2,date1) | idx1(c2) | primary(c1) | 说明 |
---|---|---|---|---|
keys() | 1 | 1 | 1 | 用来存放所有条件列涉及的key,这里指t1所有索引 |
const_keys | 1 | 1 | 0 | 用来存放所有列与常量对比的key,这里指t1.c2<5 |
下面删除t1的一些数据,让join方式变为t1在前t3在后,这样的话对t1就不需要执行recheck,并且最后t1也通过索引进行扫描。
greatsql> DELETE FROM t1 WHERE c1 IN (7,8,11); greatsql> EXPLAIN SELECT * FROM t1 JOIN t3 ON t1.c1=t3.ccc1 OR t1.c2<5; +----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+ | 1 | SIMPLE | t1 | NULL | index | PRIMARY,idx1,idx2 | idx2 | 11 | NULL | 3 | 100.00 | Using index | | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 10 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+-------+------------+-------+-------------------+------+---------+------+------+----------+--------------------------------------------+
下面执行一下三张表的连接,看到因为t1不是第一张表,最后用到了DYNAMIC RANGE
greatsql> INSERT INTO t1 VALUES (7,null,'2020-03-25 16:44:00.123456'),(8,10,'2020-10-25 16:44:00.123456'),(11,16,'2023-03-25 16:44:00.123456'); greatsql> EXPLAIN SELECT * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ; +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+ | 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index | | 1 | SIMPLE | t1 | NULL | ALL | PRIMARY,idx1,idx2 | NULL | NULL | NULL | 7 | 42.85 | Range checked for each record (index map: 0x7) | | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+------------------------------------------------+
如果遇到 DYNAMIC RANGE
的情况,每行进行索引判断肯定比一开始就决定单纯走索引浪费时间,为了避免这种情况,可以采取以下的方法来避免。
1、强制让 DYNAMIC RANGE
的表作为驱动表
greatsql> EXPLAIN SELECT /*+ qb_name(qb1) JOIN_ORDER(@qb1 t1,t2,t3) */ * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ; +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+ | 1 | SIMPLE | t1 | NULL | index | PRIMARY,idx1,idx2 | idx2 | 11 | NULL | 7 | 100.00 | Using index | | 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using where; Using index; Using join buffer (hash join) | | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+-------+------------+-------+-------------------+--------+---------+------+------+----------+---------------------------------------------------------+
2、给 DYNAMIC RANGE
的表强制指定索引
greatsql> EXPLAIN SELECT * FROM t1 FORCE INDEX(idx1,idx2) join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1 or t1.c2<5 ; +----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+ | 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index | | 1 | SIMPLE | t1 | NULL | index | idx1,idx2 | idx2 | 11 | NULL | 7 | 42.85 | Using where; Using index; Using join buffer (hash join) | | 1 | SIMPLE | t3 | NULL | ALL | idx3_1 | NULL | NULL | NULL | 5 | 100.00 | Using where; Using join buffer (hash join) | +----+-------------+-------+------------+-------+---------------+--------+---------+------+------+----------+---------------------------------------------------------+
3、改变条件,删除or条件,使用 keyuse_array 数组来选择路径。
greatsql> EXPLAIN SELECT * FROM t1 join t3 join t2 ON t1.c1=t3.ccc1 and t1.c1=t2.cc1; +----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+ | id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+ | 1 | SIMPLE | t2 | NULL | index | PRIMARY | idx2_1 | 5 | NULL | 5 | 100.00 | Using index | | 1 | SIMPLE | t3 | NULL | ref | idx3_1 | idx3_1 | 5 | db1.t2.cc1 | 1 | 100.00 | NULL | | 1 | SIMPLE | t1 | NULL | eq_ref | PRIMARY | PRIMARY | 4 | db1.t2.cc1 | 1 | 100.00 | NULL | +----+-------------+-------+------------+--------+---------------+---------+---------+------------+------+----------+-------------+
四、总结
从上面关于 DYNAMIC RANGE
的解释我们知道这个状态需要经过一系列判断,并且只在特定条件才有可能出现,但是每行进行一次索引判断还是很消耗资源的,最好还是直接走索引,所以要尽量避免这种情况,适当的时候可以进行 SQL 命令干预。
Enjoy GreatSQL 😃
关于 GreatSQL
GreatSQL是适用于金融级应用的国内自主开源数据库,具备高性能、高可靠、高易用性、高安全等多个核心特性,可以作为MySQL或Percona Server的可选替换,用于线上生产环境,且完全免费并兼容MySQL或Percona Server。
相关链接: GreatSQL社区 Gitee GitHub Bilibili
GreatSQL社区:
社区博客有奖征稿详情:https://greatsql.cn/thread-100-1-1.html
技术交流群:
微信:扫码添加
GreatSQL社区助手
微信好友,发送验证信息加群
。
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步