表数据量影响MySQL索引选择
程序员文章站
2022-05-15 08:59:49
现象 新建了一张员工表,插入了少量数据,索引中所有的字段均在where条件出现时,正确走到了idx_nap索引,但是where出现部分自左开始的索引时,却进行全表扫描,与MySQL官方所说的最左匹配原则“相悖”。 数据背景 sql CREATE TABLE ( int(11) NOT NULL AU ......
现象
新建了一张员工表,插入了少量数据,索引中所有的字段均在where条件出现时,正确走到了idx_nap索引,但是where出现部分自左开始的索引时,却进行全表扫描,与mysql官方所说的最左匹配原则“相悖”。
数据背景
create table `staffs` ( `id` int(11) not null auto_increment, `name` varchar(24) not null default '' comment '姓名', `age` int(11) not null default '0' comment '年龄', `pos` varchar(20) not null default '' comment '职位', `add_time` timestamp not null default current_timestamp comment '入职时间', primary key (`id`), key `idx_nap` (`name`,`age`,`pos`) ) engine=innodb auto_increment=8 default charset=utf8 comment='员工记录表'; 表中数据如下: id name age pos add_time 1 july 23 dev 2018-06-04 16:02:02 2 clive 22 dev 2018-06-04 16:02:32 3 cleva 24 test 2018-06-04 16:02:38 4 july 23 test 2018-06-04 16:12:22 5 july 23 pre 2018-06-04 16:12:37 6 clive 22 pre 2018-06-04 16:12:48 7 july 25 dev 2018-06-04 16:30:17
explain语句看下执行计划
-- 全匹配走了索引 explain select * from staffs where name = 'july' and age = 23 and pos = 'dev'; id select_type table partitions type possible_keys key key_len ref rows filtered extra 1 simple staffs null ref idx_nap idx_nap 140 const,const,const 1 100.00 null
开启优化器跟踪优化过程
-- 左侧部分匹配却没有走索引,全表扫描 explain select * from staffs where name = 'july' and age = 23; id select_type table partitions type possible_keys key key_len ref rows filtered extra 1 simple staffs2 null all idx_nap null null null 6 50.00 using where
-- 开启优化器跟踪 set session optimizer_trace='enabled=on'; -- 在执行完查询语句后,在执行以下的select语句可以查看具体的优化器执行过程 select * from information_schema.optimizer_trace;
trace部分的内容
{ "steps": [ { "join_preparation": { "select#": 1, "steps": [ { "expanded_query": "/* select#1 */ select `staffs`.`id` as `id`,`staffs`.`name` as `name`,`staffs`.`age` as `age`,`staffs`.`pos` as `pos`,`staffs`.`add_time` as `add_time` from `staffs` where ((`staffs`.`name` = 'july') and (`staffs`.`age` = 23))" } ] } }, { "join_optimization": { "select#": 1, "steps": [ { "condition_processing": { "condition": "where", "original_condition": "((`staffs`.`name` = 'july') and (`staffs`.`age` = 23))", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" }, { "transformation": "constant_propagation", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" }, { "transformation": "trivial_condition_removal", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" } ] } }, { "substitute_generated_columns": { } }, { "table_dependencies": [ { "table": "`staffs`", "row_may_be_null": false, "map_bit": 0, "depends_on_map_bits": [ ] } ] }, { "ref_optimizer_key_uses": [ { "table": "`staffs`", "field": "name", "equals": "'july'", "null_rejecting": false }, { "table": "`staffs`", "field": "age", "equals": "23", "null_rejecting": false } ] }, { "rows_estimation": [ { "table": "`staffs`", "range_analysis": { "table_scan": { "rows": 6, "cost": 4.3 }, "potential_range_indexes": [ { "index": "primary", "usable": false, "cause": "not_applicable" }, { "index": "idx_nap", "usable": true, "key_parts": [ "name", "age", "pos", "id" ] } ], "setup_range_conditions": [ ], "group_index_range": { "chosen": false, "cause": "not_group_by_or_distinct" }, "analyzing_range_alternatives": { "range_scan_alternatives": [ { "index": "idx_nap", "ranges": [ "july <= name <= july and 23 <= age <= 23" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": false, "rows": 3, "cost": 4.61, "chosen": false, "cause": "cost" } ], "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" } } } } ] }, { "considered_execution_plans": [ { "plan_prefix": [ ], "table": "`staffs`", "best_access_path": { "considered_access_paths": [ { //可以看到这边mysql计算得到使用索引的成本为2.6 "access_type": "ref", "index": "idx_nap", "rows": 3, "cost": 2.6, "chosen": true }, { //而全表扫描计算所得的成本为2.2 "rows_to_scan": 6, "access_type": "scan", "resulting_rows": 6, "cost": 2.2, "chosen": true } ] }, //因此选择了成本更低的scan "condition_filtering_pct": 100, "rows_for_plan": 6, "cost_for_plan": 2.2, "chosen": true } ] }, { "attaching_conditions_to_tables": { "original_condition": "((`staffs`.`age` = 23) and (`staffs`.`name` = 'july'))", "attached_conditions_computation": [ ], "attached_conditions_summary": [ { "table": "`staffs`", "attached": "((`staffs`.`age` = 23) and (`staffs`.`name` = 'july'))" } ] } }, { "refine_plan": [ { "table": "`staffs`" } ] } ] } }, { "join_execution": { "select#": 1, "steps": [ ] } } ] }
增加表数据量
-- 接下来增大表的数据量 insert into `staffs` (`name`, `age`, `pos`, `add_time`) values ('july', 25, 'dev', '2018-06-04 16:30:17'), ('july', 23, 'dev1', '2018-06-04 16:02:02'), ('july', 23, 'dev2', '2018-06-04 16:02:02'), ('july', 23, 'dev3', '2018-06-04 16:02:02'), ('july', 23, 'dev4', '2018-06-04 16:02:02'), ('july', 23, 'dev6', '2018-06-04 16:02:02'), ('july', 23, 'dev5', '2018-06-04 16:02:02'), ('july', 23, 'dev7', '2018-06-04 16:02:02'), ('july', 23, 'dev8', '2018-06-04 16:02:02'), ('july', 23, 'dev9', '2018-06-04 16:02:02'), ('july', 23, 'dev10', '2018-06-04 16:02:02'), ('clive', 23, 'dev1', '2018-06-04 16:02:02'), ('clive', 23, 'dev2', '2018-06-04 16:02:02'), ('clive', 23, 'dev3', '2018-06-04 16:02:02'), ('clive', 23, 'dev4', '2018-06-04 16:02:02'), ('clive', 23, 'dev6', '2018-06-04 16:02:02'), ('clive', 23, 'dev5', '2018-06-04 16:02:02'), ('clive', 23, 'dev7', '2018-06-04 16:02:02'), ('clive', 23, 'dev8', '2018-06-04 16:02:02'), ('clive', 23, 'dev9', '2018-06-04 16:02:02'), ('clive', 23, 'dev10', '2018-06-04 16:02:02');
执行explain
-- 再次执行同样的查询语句,会发现走到索引上了 explain select * from staffs where name = 'july' and age = 23; id select_type table partitions type possible_keys key key_len ref rows filtered extra 1 simple staffs null ref idx_nap idx_nap 78 const,const 13 100.00 null
查看新的trace内容
-- 再看下优化器执行过程 { "steps": [ { "join_preparation": { "select#": 1, "steps": [ { "expanded_query": "/* select#1 */ select `staffs`.`id` as `id`,`staffs`.`name` as `name`,`staffs`.`age` as `age`,`staffs`.`pos` as `pos`,`staffs`.`add_time` as `add_time` from `staffs` where ((`staffs`.`name` = 'july') and (`staffs`.`age` = 23))" } ] } }, { "join_optimization": { "select#": 1, "steps": [ { "condition_processing": { "condition": "where", "original_condition": "((`staffs`.`name` = 'july') and (`staffs`.`age` = 23))", "steps": [ { "transformation": "equality_propagation", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" }, { "transformation": "constant_propagation", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" }, { "transformation": "trivial_condition_removal", "resulting_condition": "((`staffs`.`name` = 'july') and multiple equal(23, `staffs`.`age`))" } ] } }, { "substitute_generated_columns": { } }, { "table_dependencies": [ { "table": "`staffs`", "row_may_be_null": false, "map_bit": 0, "depends_on_map_bits": [ ] } ] }, { "ref_optimizer_key_uses": [ { "table": "`staffs`", "field": "name", "equals": "'july'", "null_rejecting": false }, { "table": "`staffs`", "field": "age", "equals": "23", "null_rejecting": false } ] }, { "rows_estimation": [ { "table": "`staffs`", "range_analysis": { "table_scan": { "rows": 27, "cost": 8.5 }, "potential_range_indexes": [ { "index": "primary", "usable": false, "cause": "not_applicable" }, { "index": "idx_nap", "usable": true, "key_parts": [ "name", "age", "pos", "id" ] } ], "setup_range_conditions": [ ], "group_index_range": { "chosen": false, "cause": "not_group_by_or_distinct" }, "analyzing_range_alternatives": { "range_scan_alternatives": [ { "index": "idx_nap", "ranges": [ "july <= name <= july and 23 <= age <= 23" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": false, "rows": 13, "cost": 16.61, "chosen": false, "cause": "cost" } ], "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" } } } } ] }, { "considered_execution_plans": [ { "plan_prefix": [ ], "table": "`staffs`", "best_access_path": { "considered_access_paths": [ { //使用索引的成本变为了5.3 "access_type": "ref", "index": "idx_nap", "rows": 13, "cost": 5.3, "chosen": true }, { //scan的成本变为了6.4 "rows_to_scan": 27, "access_type": "scan", "resulting_rows": 27, "cost": 6.4, "chosen": false } ] }, //使用索引查询的成本更低,因此选择了走索引 "condition_filtering_pct": 100, "rows_for_plan": 13, "cost_for_plan": 5.3, "chosen": true } ] }, { "attaching_conditions_to_tables": { "original_condition": "((`staffs`.`age` = 23) and (`staffs`.`name` = 'july'))", "attached_conditions_computation": [ ], "attached_conditions_summary": [ { "table": "`staffs`", "attached": null } ] } }, { "refine_plan": [ { "table": "`staffs`" } ] } ] } }, { "join_execution": { "select#": 1, "steps": [ ] } } ] }
结论
mysql表数据量的大小,会影响索引的选择,具体的情况还是通过explain和optimizer trace来查看与分析。
上一篇: Squid代理服务器