postgresql 分区表创建及测试
1 建立分区
1.1. 创建主表
CREATE TABLE measurement (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales int
);
CREATE TABLE
这里date类型精确到天,如'2006-02-01',如果时间精确到秒,如'2013-01-09 00:00:00' 使用timestamp 类型
1.2. 创建分区表
CREATE TABLEmeasurement_y2006m01 (
CHECK (logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m02 (
CHECK (logdate >= DATE '2006-02-01' AND logdate < DATE '2006-03-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m03 (
CHECK (logdate >= DATE '2006-03-01' AND logdate < DATE '2006-04-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m04 (
CHECK (logdate >= DATE '2006-04-01' AND logdate < DATE '2006-05-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m05 (
CHECK (logdate >= DATE '2006-05-01' AND logdate < DATE '2006-06-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m06 (
CHECK (logdate >= DATE '2006-06-01' AND logdate < DATE '2006-07-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m07 (
CHECK (logdate >= DATE '2006-07-01' AND logdate < DATE '2006-08-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m08 (
CHECK (logdate >= DATE '2006-08-01' AND logdate < DATE '2006-09-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m09 (
CHECK (logdate >= DATE '2006-09-01' AND logdate < DATE '2006-10-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m10 (
CHECK (logdate >= DATE '2006-10-01' AND logdate < DATE '2006-11-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m11 (
CHECK (logdate >= DATE '2006-11-01' AND logdate < DATE '2006-12-01' )
) INHERITS (measurement);
CREATE TABLE measurement_y2006m12 (
CHECK (logdate >= DATE '2006-12-01' AND logdate < DATE '2007-01-01' )
) INHERITS (measurement);
CREATE TABLE
1.3. 分区键上建索引
CREATE INDEX measurement_y2006m02_logdateON measurement_y2006m02 (logdate);
CREATE INDEX measurement_y2006m03_logdateON measurement_y2006m03 (logdate);
CREATE INDEX measurement_y2006m04_logdateON measurement_y2006m04 (logdate);
CREATE INDEX measurement_y2006m05_logdateON measurement_y2006m05 (logdate);
CREATE INDEX measurement_y2006m06_logdateON measurement_y2006m06 (logdate);
CREATE INDEX measurement_y2006m07_logdateON measurement_y2006m07 (logdate);
CREATE INDEX measurement_y2006m08_logdateON measurement_y2006m08 (logdate);
CREATE INDEX measurement_y2006m09_logdateON measurement_y2006m09 (logdate);
CREATE INDEX measurement_y2006m10_logdateON measurement_y2006m10 (logdate);
CREATE INDEX measurement_y2006m11_logdateON measurement_y2006m11 (logdate);
CREATE INDEX measurement_y2006m12_logdateON measurement_y2006m12 (logdate);
CREATE INDEX
执行insert into measurement命令向表中插入数据,希望数据按一定的规则插入相应分区,需要前期人工处理下,这里有两种策略,建立触发器和RULE规则。建立触发器方法需要1.4、1.5两步操作,1.6是建立RULE规则方法。
1.4. 创建触发器函数
CREATE OR REPLACEFUNCTION measurement_insert_trigger()
RETURNSTRIGGER AS $$
BEGIN
IF ( NEW.logdate >= DATE '2006-01-01'AND
NEW.logdate < DATE '2006-02-01' )THEN
INSERT INTO measurement_y2006m01 VALUES(NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-02-01' AND
NEW.logdate < DATE '2006-03-01' ) THEN
INSERT INTO measurement_y2006m02 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-03-01'AND
NEW.logdate < DATE '2006-04-01' ) THEN
INSERT INTO measurement_y2006m03 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-04-01'AND
NEW.logdate < DATE '2006-05-01' ) THEN
INSERT INTO measurement_y2006m04 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-05-01'AND
NEW.logdate < DATE '2006-06-01' ) THEN
INSERT INTO measurement_y2006m05 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-06-01'AND
NEW.logdate < DATE '2006-07-01' ) THEN
INSERT INTO measurement_y2006m06 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-07-01'AND
NEW.logdate < DATE '2006-08-01' ) THEN
INSERT INTO measurement_y2006m07 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-08-01'AND
NEW.logdate < DATE '2006-09-01' ) THEN
INSERT INTO measurement_y2006m08 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-09-01' AND
NEW.logdate < DATE '2006-10-01' ) THEN
INSERT INTO measurement_y2006m09 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-10-01' AND
NEW.logdate < DATE '2006-11-01' ) THEN
INSERT INTO measurement_y2006m10 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-11-01'AND
NEW.logdate < DATE '2006-12-01' ) THEN
INSERT INTO measurement_y2006m11 VALUES (NEW.*);
ELSIF ( NEW.logdate >= DATE '2006-12-01' AND
NEW.logdate < DATE '2007-01-01' ) THEN
INSERT INTO measurement_y2006m12 VALUES (NEW.*);
ELSE
RAISE EXCEPTION 'Date out of range. Fix the measurement_insert_trigger() function!';
END IF;
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
CREATE FUNCTION
说明:如果不想丢失数据,上面的ELSE 条件可以改成 INSERTINTO measurement _error_ logdate VALUES (NEW.*); 同时需要创建一张结构和measurement 一样的表measurement_error_logdate,这样,错误的logdate 数据就可以插入到这张表中而不是报错了。
1.5. 创建触发器
CREATE TRIGGERinsert_measurement_trigger
BEFORE INSERT ON measurement
FOR EACH ROW EXECUTE PROCEDUREmeasurement_insert_trigger();
CREATE TRIGGER
1.6. 改用RULE规则
也可以不用触发器,改用RULE规则,为每个分表创建规则
CREATE RULE measurement_y2006m01 AS
ON INSERT TO measurement where(NEW.logdate >= DATE '2006-01-01' AND
NEW.logdate < DATE'2006-02-01')
DO INSTEAD
INSERT INTO measurement VALUES (NEW.*);
CREATE RULE measurement_y2006m02 AS
ON INSERT TO measurement where(NEW.logdate >= DATE '2006-02-01' AND
NEW.logdate < DATE'2006-03-01')
DO INSTEAD
INSERT INTO measurement VALUES (NEW.*);
……省略
2 测试
2.1. 查看所有表
mydb=# \d
List of relations
Schema | Name | Type | Owner
--------+----------------------+-------+-------
public | measurement | table | super
public | measurement_y2006m01 | table | super
public | measurement_y2006m02 | table | super
public | measurement_y2006m03 | table | super
public | measurement_y2006m04 | table | super
public | measurement_y2006m05 | table | super
public | measurement_y2006m06 | table | super
public | measurement_y2006m07 | table | super
public | measurement_y2006m08 | table | super
public | measurement_y2006m09 | table | super
public | measurement_y2006m10 | table | super
public | measurement_y2006m11 | table | super
public | measurement_y2006m12 | table | super
(13rows)
2.2. 查看主表结构
mydb=#\d measurement
Table "public.measurement"
Column | Type | Modifiers
-----------+---------+-----------
city_id | integer | not null
logdate | date |not null
peaktemp | integer |
unitsales | integer |
Triggers:
insert_measurement_trigger BEFORE INSERT ON measurement FOR EACH ROWEXECUTE PROCEDURE measurement_insert_trigger().
Number of child tables: 12 (Use \d+ to listthem.)
2.3. 插入数据
mydb=# insert into measurement values(1,date '2006-02-10',1,1);
INSERT 0 0
mydb=# insert into measurement values(1,date '2006-03-10',1,1);
INSERT 0 0
mydb=# insert into measurement values(1,date '2006-04-10',1,1);
INSERT 0 0
2.4. 查看主表数据
mydb=# select * from measurement;
city_id | logdate | peaktemp | unitsales
---------+------------+----------+-----------
1 | 2006-02-10 | 1 | 1
1 | 2006-03-10 | 1 | 1
1 | 2006-04-10 | 1 | 1
(3 rows)
2.5. 查看分表数据
mydb=# select * frommeasurement_y2006m03;
city_id | logdate | peaktemp | unitsales
---------+------------+----------+-----------
1 | 2006-03-10 | 1 | 1
(1 row)
2.6. 分区排除和查询优化
默认情况下,是不会开启分区排除的,索引针对一个基于分区键条件的检索,要扫描所有的分区
mydb=# SET constraint_exclusion = off;
SET
mydb=# EXPLAIN SELECT count(*) FROMmeasurement WHERE logdate <= DATE '2006-02-01';
QUERYPLAN
-----------------------------------------------------------------------------------------------
Aggregate (cost=436.80..436.81 rows=1 width=0)
-> Append (cost=0.00..417.62 rows=7670 width=0)
-> Seq Scan onmeasurement (cost=0.00..32.12 rows=590width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m01 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m02 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m03 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m04 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m05 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan on measurement_y2006m06measurement (cost=0.00..32.12 rows=590width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m07 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <= '2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m08 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m09 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m10 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m11 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan onmeasurement_y2006m12 measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
(28 rows)
2.7. 开启约束排除
mydb=# SET constraint_exclusion = on;
SET
mydb=> EXPLAIN SELECT count(*) FROM measurement WHERE logdate<= DATE '2006-02-01';
QUERYPLAN
-----------------------------------------------------------------------------------------------
Aggregate (cost=100.80..100.81 rows=1 width=0)
-> Append (cost=0.00..96.38 rows=1770 width=0)
-> Seq Scan on measurement (cost=0.00..32.12 rows=590 width=0)
Filter: (logdate <='2006-02-01'::date)
-> Seq Scan on measurement_y2006m01measurement (cost=0.00..32.12 rows=590width=0)
Filter:(logdate <= '2006-02-01'::date)
-> Seq Scan on measurement_y2006m02 measurement (cost=0.00..32.12 rows=590 width=0)
Filter:(logdate <= '2006-02-01'::date)
(8 rows)
至此分区表建立完成