写有效的历史数据迁移sql
转自:http://rdc.taobao.com/blog/dba/html/124_oracle_data_transfer.html
作者:丁原
作者:丁原
对于高并发oltp系统,生产库可能只需保留当前几个月的数据,之前的数据要全部迁移到历史库中。那么,如何处理这样的需求,如何写合适的历史迁移程序呢?
1.常规写法
begin
--迁移数据
insert into tb_users
select *
from tb_users_dbc a
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
--删除原表数据
delete from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
commit;
end;
--迁移数据
insert into tb_users
select *
from tb_users_dbc a
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
--删除原表数据
delete from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
commit;
end;
2.使用for循环
declare
v_counts number := 0;
begin
--中间表
insert into tmp_tb_users
select *
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
commit;
for i in (select id from tmp_tb_users) loop
--数据迁移
insert into tb_users
select * from tb_users_dbc where id = i.id;
delete from tb_users_dbc where id = i.id;
v_counts := v_counts + 1;
--分批提交
if mod(v_counts, 100) = 0 then
commit;
end if;
end loop;
end;
v_counts number := 0;
begin
--中间表
insert into tmp_tb_users
select *
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
commit;
for i in (select id from tmp_tb_users) loop
--数据迁移
insert into tb_users
select * from tb_users_dbc where id = i.id;
delete from tb_users_dbc where id = i.id;
v_counts := v_counts + 1;
--分批提交
if mod(v_counts, 100) = 0 then
commit;
end if;
end loop;
end;
3.使用bulk collect
declare
type rec_rids is table of varchar2(32);
v_rids rec_rids;
cursor cur_aids is
select id
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
begin
open cur_aids;
loop
fetch cur_aids BULK COLLECT INTO v_rids LIMIT 1000;
for i in 1 .. v_rids.count loop
--数据迁移
insert into tb_users
select * from tb_users_dbc where id = v_rids(i);
delete from tb_users_dbc where id = v_rids(i);
end loop;
commit;
EXIT WHEN v_rids.COUNT = 0;
end loop;
close cur_aids;
end;
type rec_rids is table of varchar2(32);
v_rids rec_rids;
cursor cur_aids is
select id
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum<5000000;
begin
open cur_aids;
loop
fetch cur_aids BULK COLLECT INTO v_rids LIMIT 1000;
for i in 1 .. v_rids.count loop
--数据迁移
insert into tb_users
select * from tb_users_dbc where id = v_rids(i);
delete from tb_users_dbc where id = v_rids(i);
end loop;
commit;
EXIT WHEN v_rids.COUNT = 0;
end loop;
close cur_aids;
end;
很少考虑常规写法,数据迁移一般会选择采用2,3种方法。
相比for迁移方式,采用bulk collect避免了上下文切换,在大批量数据迁移中还是有很大优势的,推荐采用bulk collect数据迁移方案。
Asktom上有很多关于历史迁移方案的帖子,Tom的答案一直都是常规写法,也就是上面的第一种写法,可是常规写法消耗大量的资源,出现异常时整个事务都得回滚。因此很多人都认为Tom的方案不可行,认为Tom并没有接触过大数据库等。
4.借助Tom的思想,把迁移的数据拆分成n个小表,对n个小表进行迁移
declare
pagecount number;
sumcount number;
loopcount number;
begin
--取要迁移的数据
execute immediate 'truncate table tmp_tb_users';
insert into tmp_tb_users
select id
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum < 5000000;
--计算迁移总数
select count(*) into sumcount from tmp_tb_users;
--设定每次迁移条数
pagecount = 2000;
--得到循环次数
loopcount := sumcount / pagecount + 1;
for i in 1 .. loopcount loop
begcount := (i - 1) * pagecount + 1;
endcount := i * pagecount;
--构建中间小表
execute immediate 'truncate table mid_tb_users';
insert into mid_tb_users
select id, b.rn
from tmp_tb_users b where b.rn >= begcount and b.rn <= endcount;
--小表和原表进行关联,迁移数据
insert into tb_users
select f.*
from mid_tb_users t, tb_users_dbc f
where t.biz_order_id = f.biz_order_id
and t.rn >= begcount and t.rn < = endcount;
--删除原表数据
delete from tb_users_dbc where id in (select id from mid_tb_users);
commit;
end loop;
end;
pagecount number;
sumcount number;
loopcount number;
begin
--取要迁移的数据
execute immediate 'truncate table tmp_tb_users';
insert into tmp_tb_users
select id
from tb_users_dbc
where gmt_modified <= trunc(sysdate) - v_day and rownum < 5000000;
--计算迁移总数
select count(*) into sumcount from tmp_tb_users;
--设定每次迁移条数
pagecount = 2000;
--得到循环次数
loopcount := sumcount / pagecount + 1;
for i in 1 .. loopcount loop
begcount := (i - 1) * pagecount + 1;
endcount := i * pagecount;
--构建中间小表
execute immediate 'truncate table mid_tb_users';
insert into mid_tb_users
select id, b.rn
from tmp_tb_users b where b.rn >= begcount and b.rn <= endcount;
--小表和原表进行关联,迁移数据
insert into tb_users
select f.*
from mid_tb_users t, tb_users_dbc f
where t.biz_order_id = f.biz_order_id
and t.rn >= begcount and t.rn < = endcount;
--删除原表数据
delete from tb_users_dbc where id in (select id from mid_tb_users);
commit;
end loop;
end;
1.优于常规写法,可以分批多次进行提交,加入异常处理可以避免全部数据回滚。
2.优于for迁移,借助中间小表一次迁移多条记录,大大降低了insert,delete的执行次数。
3.主要是提供给大家一种思路。
结论:
其实写法好没有好坏之分,关键在于怎么用,就大批量迁移数据来说,我觉得3,4都是比较可行的方案。欢迎大家拍砖讨论,也欢迎贡献更好的数据迁移办法。
注:上面脚本是伪代码