oracle去重等基础问题
--去重查询方法一:根据id
select * from sxe where id in(select min(id) from sxe group by username) order by id asc;
--去重查询方法二:根据rownum
select * from (select s.*,rownum rn from sxe s ) x where x.rn in (select min(rownum) from sxe group by username) order by id asc;
--去重查询方法三:根据rowid
select * from (select s.*,rowid rid from sxe s) x where x.rid in (select min(rowid) from sxe group by username) order by id asc;
select s.*,rowid from sxe s where rowid in (select min(rowid) from sxe group by username) order by id asc;
--去重删除方法一:根据ID
delete from sxe where id not in (select min(id) from sxe group by username);
--去重删除方法二:根据rownum
--delete from (select s.*,rownum from sxe s) x where rownum not in (select min(rownum) from sxe group by username);
--去重删除方法三:根据rowid
delete from sxe where rowid not in (select min(rowid) from sxe group by username);
Disctinct关键词多列问题:
关于数据迁移,如何处理大数据量重复问题?针对Oracle
create table table1 selet * from table2; --按照table2的结构创建table1,并将table2的数据导入table1;
create table table1 select * from table2 where 1 = 2; --按照table2的结构创建table1,但不导入数据;
开发过程中,如果涉及的数据量小的情况下删除可以用简单的sql执行。但是数据量很大的迁移,百万千万级的数据量,性能是瓶颈的发生点;
因为查询需要时间,执行删除需要时间,删除完毕执行事务需要时间,因此性能基本上为零,弄不好数据库假死,甚至电脑假死。
所我的大数据迁移经验就是:
create table temp_table select * from table2 where id not in (select min(id) from table1 group by coln) order by coln asc;
drop table table2;
rename temp_table to table2;
关于数据库表结构编辑,针对Oracle:
增加列:
alter table table1 add column_name column_type;
修改列大小:
alter table table1 modify column_name new_column_type;
修改列名称:
alter table table1 rename column cln1 to cln2;
删除列:
alter table tabl1 drop column cln1;