【Oracle】SQL对某字段模糊查询,哪种方案最快?
问题:有一张表hy_test,查找其字段name中包含ufo的记录数,下面哪种方案最快?
A.select count(*) from hy_test where name like '%ufo%'
B.select count(*) from hy_test where instr(name,'ufo')> 0
C.with temp as (select rowid from hy_test t where t.name like '%ufo%')
select count(*) from hy_test where rowid in (select rowid from temp)
D.with temp as (select rowid from hy_test t where t.name like '%ufo%')
select count(*) from hy_test a where exists (select rowid from temp where a.rowid=rowid)
A是常规方案,B是网文推荐的方案,C D 是不常见但也有人推荐的方案。
你心中的答案是哪个?
我先给name加上了索引
create index hy_test_name on hy_test(name);
然后看各自的解释计划:
select count(*) from hy_test where name like '%ufo%' Plan hash value: 2970624229 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 62 | 3067 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | INDEX FAST FULL SCAN| HY_TEST_NAME | 7426 | 449K| 3067 (1)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2) select count(*) from hy_test where instr(name,'ufo')> 0 Plan hash value: 2970624229 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 62 | 3070 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | INDEX FAST FULL SCAN| HY_TEST_NAME | 7426 | 449K| 3070 (1)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter(INSTR("NAME",U'ufo')>0) Note ----- - dynamic statistics used: dynamic sampling (level=2) with temp as (select rowid from hy_test t where t.name like '%ufo%') select count(*) from hy_test where rowid in (select rowid from temp) Plan hash value: 2970624229 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 62 | 3067 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | INDEX FAST FULL SCAN| HY_TEST_NAME | 7426 | 449K| 3067 (1)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("HY_TEST"."NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2) with temp as (select rowid from hy_test t where t.name like '%ufo%') select count(*) from hy_test a where exists (select rowid from temp where a.rowid=rowid) Plan hash value: 2970624229 -------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | -------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 62 | 3067 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | INDEX FAST FULL SCAN| HY_TEST_NAME | 7426 | 449K| 3067 (1)| 00:00:01 | -------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("A"."NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2)
至少从解释计划来看,四种方案都是差不多的,Cost 都在3067左右,细究的话。instr方案还稍慢点,到了3070!
drop index hy_test_name
再把索引去掉比较:
无索引 select count(*) from hy_test where name like '%ufo%' Plan hash value: 1972112514 ------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | 62 | 3857 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | TABLE ACCESS FULL| HY_TEST | 7426 | 449K| 3857 (1)| 00:00:01 | ------------------------------------------------------------------------------ Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2) select count(*) from hy_test where instr(name,'ufo')> 0 Plan hash value: 1972112514 ------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | 62 | 3860 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | TABLE ACCESS FULL| HY_TEST | 7426 | 449K| 3860 (1)| 00:00:01 | ------------------------------------------------------------------------------ Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter(INSTR("NAME",U'ufo')>0) Note ----- - dynamic statistics used: dynamic sampling (level=2) EXPLAIN PLAN FOR with temp as (select rowid from hy_test t where t.name like '%ufo%') select count(*) from hy_test where rowid in (select rowid from temp) Plan hash value: 1972112514 ------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | 62 | 3857 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | TABLE ACCESS FULL| HY_TEST | 7426 | 449K| 3857 (1)| 00:00:01 | ------------------------------------------------------------------------------ Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("HY_TEST"."NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2) EXPLAIN PLAN FOR with temp as (select rowid from hy_test t where t.name like '%ufo%') select count(*) from hy_test a where exists (select rowid from temp where a.rowid=rowid) Plan hash value: 1972112514 ------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | 62 | 3857 (1)| 00:00:01 | | 1 | SORT AGGREGATE | | 1 | 62 | | | |* 2 | TABLE ACCESS FULL| HY_TEST | 7426 | 449K| 3857 (1)| 00:00:01 | ------------------------------------------------------------------------------ Predicate Information (identified by operation id): --------------------------------------------------- 2 - filter("A"."NAME" LIKE U'%ufo%') Note ----- - dynamic statistics used: dynamic sampling (level=2)
现在看,instr仍然是最慢的,cost为3860,其它三个都在3857!
结论:大体上四种方案都没什么差别,细究下,网红方案instr反而是最慢的,看上去最low的%ufo%并不慢。
对此,你能同意吗?面试时遇到这题你会怎么写?
如果你想重复我的实验,可以用以下SQL创建表和数据:
CREATE TABLE hy_test ( id NUMBER not null primary key, name NVARCHAR2(60) not null, score NUMBER(4,0) NOT NULL, createtime TIMESTAMP (6) not null ) Insert into hy_test select rownum,dbms_random.string('*',dbms_random.value(6,20)),dbms_random.value(0,20),sysdate from dual connect by level<=2000000 order by dbms_random.random
然后用一下Java程序改写数据:
package recordchanger; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.DecimalFormat; import java.util.Random; import java.util.Set; import java.util.TreeSet; public class RecordChanger { public boolean changeOnePencent(String table) { Connection conn = null; Statement stmt = null; try{ Class.forName(DBParam.Driver).newInstance(); conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd); stmt = conn.createStatement(); long startMs = System.currentTimeMillis(); int totalCount=fetchExistCount(table,stmt); System.out.println("There are "+toEastNumFormat(totalCount)+" records in the table:'"+table+"'."); int changeCount=totalCount/100; System.out.println("There are "+toEastNumFormat(changeCount)+" records should be changed."); Set<Integer> idSet=fetchIdSet(totalCount,changeCount,table,stmt); System.out.println("There are "+toEastNumFormat(idSet.size())+" records in idSet."); int changed=updateRecords(idSet,table,stmt); System.out.println("There are "+toEastNumFormat(changed)+" records have been changed."); long endMs = System.currentTimeMillis(); System.out.println("It takes "+ms2DHMS(startMs,endMs)+" to update 1% records of table:'"+table+"'."); } catch (Exception e) { e.printStackTrace(); } finally { try { stmt.close(); conn.close(); } catch (SQLException e) { System.out.print("Can't close stmt/conn because of " + e.getMessage()); } } return false; } private int updateRecords(Set<Integer> idSet,String tableName,Statement stmt) throws SQLException{ int updated=0; for(int id:idSet) { String sql="update "+tableName+" set name='"+getRNDName()+"' where id='"+id+"' "; updated+= stmt.executeUpdate(sql); } return updated; } private String getRNDName() { String[] arr= {"Andy","Bill","Cindy","ufo","sufo","ufoa","ufot","AufoT","BufoT","1ufoufo","钱八","岳飞","关羽","刘备","曹操","张辽","虚竹","王语嫣"}; int index=getRandom(0,arr.length); return arr[index]; } // fetch a set of id which should be changed private Set<Integer> fetchIdSet(int totalCount,int changeCount,String tableName,Statement stmt) throws SQLException{ Set<Integer> idSet=new TreeSet<Integer>(); while(idSet.size()<changeCount) { int id=getRandom(0,totalCount); if(idSet.contains(id)==false && isIdExist(id,tableName,stmt)) { idSet.add(id); } } return idSet; } private boolean isIdExist(int id,String tableName,Statement stmt) throws SQLException{ String sql="select count(*) as cnt from "+tableName+" where id='"+id+"' "; ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { int cnt = rs.getInt("cnt"); return cnt==1; } rs.close(); return false; } // get a random num between min and max private static int getRandom(int min, int max){ Random random = new Random(); int s = random.nextInt(max) % (max - min + 1) + min; return s; } // fetch exist record count of a table private int fetchExistCount(String tableName,Statement stmt) throws SQLException{ String sql="select count(*) as cnt from "+tableName+""; ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { int cnt = rs.getInt("cnt"); return cnt; } rs.close(); return 0; } // 将整数在万分位以逗号分隔表示 public static String toEastNumFormat(long number) { DecimalFormat df = new DecimalFormat("#,####"); return df.format(number); } // change seconds to DayHourMinuteSecond format private static String ms2DHMS(long startMs, long endMs) { String retval = null; long secondCount = (endMs - startMs) / 1000; String ms = (endMs - startMs) % 1000 + "ms"; long days = secondCount / (60 * 60 * 24); long hours = (secondCount % (60 * 60 * 24)) / (60 * 60); long minutes = (secondCount % (60 * 60)) / 60; long seconds = secondCount % 60; if (days > 0) { retval = days + "d" + hours + "h" + minutes + "m" + seconds + "s"; } else if (hours > 0) { retval = hours + "h" + minutes + "m" + seconds + "s"; } else if (minutes > 0) { retval = minutes + "m" + seconds + "s"; } else { retval = seconds + "s"; } return retval + ms; } public static void main(String[] args) { RecordChanger rc=new RecordChanger(); rc.changeOnePencent("hy_test"); } protected class DBParam { public final static String Driver = "oracle.jdbc.driver.OracleDriver"; public final static String DbUrl = "jdbc:oracle:thin:@dev-dm-ufo.dev.un.local:2050/ufo"; public final static String User = "ufo"; public final static String Pswd = "test01"; } }
--END-- 2020-01-06 16:43