Postgresql分表与优化
--1、创建主表 CREATE TABLE tbl_partition ( date_key date, hour_key smallint, client_key integer, item_key integer, account integer, expense numeric ); --2、创建多个分表。每个分区表必须继承自主表,并且正常情况下都不要为这些分区表添加任何新的列。 CREATE TABLE tbl_partition_2016_01() inherits (tbl_partition); CREATE TABLE tbl_partition_2016_02() inherits (tbl_partition); CREATE TABLE tbl_partition_2016_03() inherits (tbl_partition); --CREATE TABLE tbl_partition_2016_04() inherits (tbl_partition); --3、TODO为分区表添加限制。这些限制决定了该表所能允许保存的数据集范围。这里必须保证各个分区表之间的限制不能有重叠。 ALTER TABLE tbl_partition_2016_01 ADD CONSTRAINT tbl_partition_2016_01_check_date_key CHECK (date_Key >= '2016-01-01'::date AND date_Key < '2016-02-01'::date); ALTER TABLE tbl_partition_2016_02 ADD CONSTRAINT tbl_partition_2016_02_check_date_key CHECK (date_Key >= '2016-02-01'::date AND date_Key < '2016-03-01'::date); ALTER TABLE tbl_partition_2016_03 ADD CONSTRAINT tbl_partition_2016_03_check_date_key CHECK (date_Key >= '2016-03-01'::date AND date_Key < '2016-04-01'::date); --4、为每一个分区表,在主要的列上创建索引。该索引并不是严格必须创建的,但在大部分场景下,它都非常有用。 ---CREATE INDEX tbl_partition_all_date_key ON tbl_partition_all (date_key,client_key); CREATE INDEX tbl_partition_date_key_2016_01 ON tbl_partition_2016_01 (date_key,client_key); CREATE INDEX tbl_partition_date_key_2016_02 ON tbl_partition_2016_02 (date_key,client_key); CREATE INDEX tbl_partition_date_key_2016_03 ON tbl_partition_2016_03 (date_key,client_key); --5、定义一个trigger或者rule把对主表的数据插入操作重定向到对应的分区表。 --创建分区函数 CREATE OR REPLACE FUNCTION tbl_partition_trigger() RETURNS TRIGGER AS $$ BEGIN IF NEW.date_key >= DATE '2016-01-01' AND NEW.date_Key < DATE '2016-02-01' THEN INSERT INTO tbl_partition_2016_01 VALUES (NEW.*); ELSIF NEW.date_key >= DATE '2016-02-01' AND NEW.date_Key < DATE '2016-03-01' THEN INSERT INTO tbl_partition_2016_02 VALUES (NEW.*); ELSIF NEW.date_key >= DATE '2016-03-01' AND NEW.date_Key < DATE '2016-04-01' THEN INSERT INTO tbl_partition_2016_03 VALUES (NEW.*); END IF; RETURN NULL; END; $$ LANGUAGE plpgsql; --6、挂载分区Trigger CREATE TRIGGER insert_tbl_partition_trigger BEFORE INSERT ON tbl_partition FOR EACH ROW EXECUTE PROCEDURE tbl_partition_trigger(); --7、创建全表 CREATE TABLE tbl_partition_all ( date_key date, hour_key smallint, client_key integer, item_key integer, account integer, expense numeric ); --8、自动建表触发器 CREATE OR REPLACE FUNCTION tbl_partition_trigger() RETURNS TRIGGER AS $$ DECLARE month_text TEXT; this_month_first_day_text TEXT; next_month_first_day_text TEXT; insert_statement TEXT; BEGIN SELECT to_char(NEW.date_key, 'YYYY_MM') INTO month_text; SELECT get_month_first_day(NEW.date_key) INTO this_month_first_day_text; SELECT to_char(to_date(this_month_first_day_text,'YYYY-MM-DD') + interval '1 month', 'YYYY-MM-DD') INTO next_month_first_day_text; insert_statement := 'INSERT INTO tbl_partition_' || month_text||' VALUES ($1.*)'; EXECUTE insert_statement USING NEW; RETURN NULL; EXCEPTION WHEN UNDEFINED_TABLE THEN EXECUTE 'CREATE TABLE IF NOT EXISTS tbl_partition_' || month_text || '(CHECK (date_key >= ''' || this_month_first_day_text || ''' and date_key<''' || next_month_first_day_text || ''')) INHERITS (tbl_partition)'; RAISE NOTICE 'CREATE NON-EXISTANT TABLE tbl_partition_%', month_text; EXECUTE 'CREATE INDEX tbl_partition_date_key_' || month_text || ' ON tbl_partition_' || month_text || '(date_key)'; EXECUTE insert_statement USING NEW; RETURN NULL; END; $$ LANGUAGE plpgsql; --get_month_first_day CREATE OR REPLACE FUNCTION get_month_first_day(in in_date date,out out_date text) AS $$ BEGIN SELECT to_char(in_date, 'YYYY_MM')||'_01' INTO out_date; END; $$ LANGUAGE plpgsql; --插入数据脚本 INSERT INTO tbl_partition_all select (select array_agg(i::date) from generate_series( '2015-12-01'::date, '2015-12-30'::date, '1 day'::interval) as t(i) )[floor(random()*4)+1] as date_key, floor(random()*24) as hour_key, floor(random()*1000000)+1 as client_key, floor(random()*100000)+1 as item_key, floor(random()*20)+1 as account, floor(random()*10000)+1 as expense from generate_series(1,300000000,1); INSERT INTO tbl_partition SELECT * FROM tbl_partition_all; -------------------------------------------------主体结束----------------------------------------- -------------------------------------------------主体结束----------------------------------------- --使用rule创建分表 CREATE RULE tbl_partition_rule_2016_01 AS ON INSERT TO tbl_partition WHERE date_key >= DATE '2016-01-01' AND date_Key < DATE '2016-02-01' DO INSTEAD INSERT INTO tbl_partition_2016_01 VALUES (NEW.*); --删除继承关系 ALTER TABLE tbl_partition_2016_01 NO INHERIT tbl_partition; --查询对比测试 EXPLAIN ANALYZE select count(account) ,client_key from tbl_partition v where v.date_key >='2016-03-02' and v.date_key <='2016-03-07' group by client_key ; EXPLAIN ANALYZE select count(account) ,client_key from tbl_partition_all v where v.date_key >='2016-03-02' and v.date_key <='2016-03-12' group by client_key ;