KingbaseES例程_普通表在线转分区表(基于规则)
KingbaseES例程_普通表在线转分区表
概述
普通表转分区表,使用视图的替换式规则,以路由方式,实现在线转移数据。
数据准备
/*普通大表*/
create table tab_single
as
select id, (random() * 100)::int + 1 c1, md5(id::text) name
from generate_series(1, 10000000) id;
alter table tab_single add primary key (id);
/*表结构一样的分区表*/
create table tab_part
( like tab_single )
partition by list (c1)
;
select format($$ create table tab_part_%s partition of tab_part for values in ( %s ) $$, id, id)
from generate_series(1, 100) id \gexec
/*分区表可以创建查询所需的索引*/
create index idx_tab_part_id on tab_part(id);
/*创建合并视图,作为操作入口*/
create or replace view tab_view as
select *
from tab_single
union all
select *
from tab_part;
/*视图不可更新*/
update tab_view
set name = 'a'
where id = 1;
[55000] ERROR: cannot update view "tab_view" 详细:Views containing UNION, INTERSECT, or EXCEPT are not automatically updatable. 建议:To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.
通过规则实现视图更新
INSERT 规则
/*NEW数据,插入到分区表*/
CREATE or replace RULE rul_ins_tab_view
AS ON INSERT TO tab_view
DO INSTEAD insert into tab_part
select new.*;
insert into tab_view
select id + 10000000, (random() * 100)::int + 1 c1, 'a' name
from generate_series(1, 10) id;
/*新数据插入到分区表*/
select count(*) from tab_part;
count
-------
10
(1 row)
DELETE 规则
CREATE or replace RULE rul_del_tab_view
AS ON DELETE TO tab_view
DO INSTEAD (
delete
from tab_single
where row (tab_single.*) = row (old.*) ;
delete
from tab_part
where row (tab_part.*) = row (old.*);
);
delete
from tab_view
where id = 10;
/*旧数据从普通表删除*/
select *
from tab_single
where id = 10;
id | c1 | name
----+----+------
(0 rows)
UPDATE 规则
/*
* 1. 更新分区表的数据
* 2. 如果分区表没有旧数据,则分区表插入新新数据
* 3. 删除普通表的旧数据
*/
CREATE or replace RULE rul_upd_tab_view
AS ON UPDATE TO tab_view
DO INSTEAD (
insert into tab_part
select new.*
where (select count(*) cnt
from tab_part
where row (tab_part.*) = row (new.*)) = 0;
delete
from tab_single
where row (tab_single.*) = row (old.*);
delete
from tab_part
where row (tab_part.*) = row (old.*)
and row (tab_part.*) != row (new.*)
and old <> new;
);
/*更新在普通表数据,可以更新主键值*/
update tab_view
set name='abcde'
where id = 200;
/*数据从普通表删除*/
select id, c1, name
from tab_single
where id = 200; id | c1 | name
----+----+------
(0 rows)
/*数据在分区表中插入或更新*/
select id, c1, name
from tab_part
where id = 200;
id | c1 | name
-----+----+-------
200 | 89 | abcde
(1 row)
/*更新在分区表数据,可以更新主键值*/
update tab_view
set id=20000000+200 ,
name='xxxxxxxx'
where id = 200;
/*在分区表的数据*/
select id, c1, name
from tab_part
where id = 200;
id | c1 | name
----------+----+----------
20000200 | 52 | xxxxxxxx
(1 row)
分步转移数据
do
$$
declare
stprow int;
chgrow int;
movrow int;
timstm timestamp;
begin
stprow := 10000; /*每步迁移10000行数据*/
chgrow := 0; /*实际影响的行*/
movrow := 0; /*累计迁移的行*/
loop
timstm := clock_timestamp();
with del as (delete from tab_single
where ctid = any (array(select ctid from tab_single limit stprow))
returning *)
insert into tab_part
select * from del;
commit;
get diagnostics chgrow = row_count;
exit when chgrow = 0;
movrow := movrow + chgrow;
raise info 'moved rows : % / % , % ms',chgrow, movrow, (date_part('sec',clock_timestamp()-timstm)*1000)::numeric(10,3);
end loop;
end;
$$
;
SHELL脚本高并发迁移数据
time seq 0 1000 | xargs -i -n1 -P10 psql -c "update tab_view set id = id where id between 10000*{}+0 and 10000*({}+1)-1"
real 0m49.630s
user 0m0.740s
sys 0m1.315s
抛弃普通表,保留分区表
/*分区表代替视图*/
do
$$
begin
drop view tab_view;
alter table tab_part
rename to tab_view;
commit;
end;
$$
;
或者
/*保留数据入口视图,删除视图的规则,修改视图定义*/
do
$$
begin
drop RULE rul_ins_tab_view TO tab_view;
drop RULE rul_del_tab_view TO tab_view;
drop RULE rul_upd_tab_view TO tab_view;
create or replace view tab_view as
select *
from tab_part;
commit;
end;
$$
;
注意
DML操作,主键值可能会出现重复,但可以通过校验语句避免。
KINGBASE研究院