基于pg_qualstats和hypopg的自动索引调优
pg-qualstats的安装和配置
1.安装pg-qualstats
sudo apt install postgresql-10-pg-qualstats
2.将pg_qualstats和pg_stat_statements添加到shared_preload_libraries,使得postgresql .conf文件中具有以下设置:
shared_preload_libraries = 'pg_stat_statements,pg_qualstats' # (change requires restart)
postgresql.conf文件在/etc/postgresql/10/main/目录下
执行grep 'shared_preload' postgresql.conf查看
3.重新启动PG。
service postgresql restart
4.进入PG
sudo su - postgres psql
5.查询shared_preload_libraries
show shared_preload_libraries ;
Hypopg的安装和配置
1.安装 hypopg
apt install postgresql-server-dev-10 apt install postgresql-10-hypopg
自动索引调优
1.进入PG
sudo su - postgres
2.设置采样率 pg_qualstats .sample_rate1,保证调参涉及到所有的query
psql -d postgres -c "ALTER SYSTEM SET pg_qualstats.sample_rate TO 1"
验证
psql -c "select pg_reload_conf()"
3.进入PG
psql
4.加载extension
CREATE EXTENSION hypopg; CREATE EXTENSION pg_stat_statements ; CREATE EXTENSION pg_qualstats;
5.查看配置
\dx
show shared_preload_libraries ;
6.建立测试数据库
create database testdb owner postgres;
7.复现样例测试
建表
CREATE TABLE test (id int, dept int, id2 int, id3 int, id4 int, id5 int,id6 int,id7 int,details text, zipcode int);
插入数据
INSERT INTO test SELECT (random() * 1000000)::int, (random() * 1000000)::int, (random() * 1000000)::int,(random() * 1000000)::int,(random() * 1000000)::int,(random() * 1000000)::int,(random() * 1000000)::int,(random() * 1000000)::int, md5(g::text), floor(random()* (20000-9999 + 1) + 9999) FROM generate_series(1,1*1e6) g;
执行workload
select * from test where id2 = 1 and id4 = 3; select * from test where id3 = 3; select * from test where id3 = 3 and id4 = 2; select * from test where id4 = 2 and id2 = 3;
建立函数 find_usable_indexes
CREATE OR REPLACE FUNCTION find_usable_indexes() RETURNS VOID AS $$ DECLARE l_queries record; l_querytext text; l_idx_def text; l_bef_exp text; l_after_exp text; hypo_idx record; l_attr record; /* l_err int; */ BEGIN CREATE TABLE IF NOT EXISTS public.idx_recommendations (queryid bigint, query text, current_plan jsonb, recmnded_index text, hypo_plan jsonb); FOR l_queries IN SELECT t.relid, t.relname, t.queryid, t.attnames, t.attnums, pg_qualstats_example_query(t.queryid) as query FROM ( SELECT qs.relid::regclass AS relname, qs.relid AS relid, qs.queryid, string_agg(DISTINCT attnames.attnames,',') AS attnames, qs.attnums FROM pg_qualstats_all qs JOIN pg_qualstats q ON q.queryid = qs.queryid JOIN pg_stat_statements ps ON q.queryid = ps.queryid JOIN pg_amop amop ON amop.amopopr = qs.opno JOIN pg_am ON amop.amopmethod = pg_am.oid, LATERAL ( SELECT pg_attribute.attname AS attnames FROM pg_attribute JOIN unnest(qs.attnums) a(a) ON a.a = pg_attribute.attnum AND pg_attribute.attrelid = qs.relid ORDER BY pg_attribute.attnum) attnames, LATERAL unnest(qs.attnums) attnum(attnum) WHERE NOT ( EXISTS ( SELECT 1 FROM pg_index i WHERE i.indrelid = qs.relid AND (arraycontains((i.indkey::integer[])[0:array_length(qs.attnums, 1) - 1], qs.attnums::integer[]) OR arraycontains(qs.attnums::integer[], (i.indkey::integer[])[0:array_length(i.indkey, 1) + 1]) AND i.indisunique))) GROUP BY qs.relid, qs.queryid, qs.qualnodeid, qs.attnums) t GROUP BY t.relid, t.relname, t.queryid, t.attnames, t.attnums LOOP /* RAISE NOTICE '% : is queryid',l_queries.queryid; */ execute 'explain (FORMAT JSON) '||l_queries.query INTO l_bef_exp; execute 'select hypopg_reset()'; execute 'SELECT indexrelid,indexname FROM hypopg_create_index(''CREATE INDEX on '||l_queries.relname||'('||l_queries.attnames||')'')' INTO hypo_idx; execute 'explain (FORMAT JSON) '||l_queries.query INTO l_after_exp; execute 'select hypopg_get_indexdef('||hypo_idx.indexrelid||')' INTO l_idx_def; INSERT INTO public.idx_recommendations (queryid,query,current_plan,recmnded_index,hypo_plan) VALUES (l_queries.queryid,l_querytext,l_bef_exp::jsonb,l_idx_def,l_after_exp::jsonb); END LOOP; execute 'select hypopg_reset()'; END; $$ LANGUAGE plpgsql;
执行函数find_usable_indexes
select find_usable_indexes();
查找索引
select queryid, current_plan->0->'Plan'->>'Total Cost' as "cost_without_index", hypo_plan->0->'Plan'->>'Total Cost' as "cost_with_index", round((((current_plan->0->'Plan'->>'Total Cost')::numeric-(hypo_plan->0->'Plan'->>'Total Cost')::numeric)*100/(current_plan->0->'Plan'->>'Total Cost')::numeric),2) as percent_improvd FROM idx_recommendations order by 4 desc;
select b.query, a.recmnded_index,round((((a.current_plan->0->'Plan'->>'Total Cost')::numeric-(hypo_plan->0->'Plan'->>'Total Cost')::numeric)*100/(a.current_plan->0->'Plan'->>'Total Cost')::numeric),2) as percent_improvd FROM idx_recommendations a JOIN pg_stat_statements b ON a.queryid = b.queryid WHERE round((((current_plan->0->'Plan'->>'Total Cost')::numeric-(hypo_plan->0->'Plan'->>'Total Cost')::numeric)*100/(current_plan->0->'Plan'->>'Total Cost')::numeric),2) > 0 order by 3 desc ;