代码改变世界

ITPUB:字符串去冗余

2011-06-15 17:11  Tracy.  阅读(353)  评论(0编辑  收藏  举报

字符串去冗余,请教

有字符串'ABCD6F6CAAX'
要求得到 'ABCD6FX'
即对于重复的字符,只保留一个
谢谢

-------------------------------Answers-----------------------------------------------------------

1.

scott@SZTYORA> CREATE OR REPLACE FUNCTION fc_str_distinct(i_str in varchar2)
2 RETURN varchar2
3 IS
4 v_str VARCHAR2(4000);
5 CURSOR cur is
6 with a as (select level as lv, substr('ABCD6F6CAAX',level, 1) as substrs
7 from dual
8 connect by level<=length('ABCD6F6CAAX') )
9 select a1.lv, a1.substrs
10 from a a1
11 where not exists (select 1
12 from a a2
13 where a2.lv<a1.lv and a2.substrs=a1.substrs)
14 order by a1.lv;
15 BEGIN
16 v_str := '';
17 FOR i in cur LOOP
18 v_str:=v_str||i.substrs;
19 END LOOP;
20 RETURN v_str;
21 END;
22 /
函数已创建。
scott@SZTYORA> select fc_str_distinct('ABCD6F6CAAX') from dual;
FC_STR_DISTINCT('ABCD6F6CAAX')
---------------------------------------------------------------------------------------------------------------
ABCD6FX

 

2.

with t as (select level as lv, substr('ABCD6F6CAAX',level, 1) as substrs
from dual
connect by level<=length('ABCD6F6CAAX')),
t1 as (select substrs, rank()over(order by min(lv)) rk from t group by substrs)
select max(replace(sys_connect_by_path(substrs,','),',')) finalstr from t1 start with rk=1 connect by rk=prior rk+1
/

3.

just for fun, 2 more options:
15:55:14 SA @>with t0 as ( select 'ABCD6F6CAAdXX' str from dual)
15:55:14 2 SELECT str, fin_str
15:55:14 3 FROM (SELECT str, fin_str,rn,ln
15:55:14 4 FROM (SELECT str,SUBSTR(str,LEVEL,1) ch, ROWNUM nu,length(str) ln
15:55:14 5 FROM t0
15:55:14 6 CONNECT BY PRIOR str = str AND LEVEL <= LENGTH(str)
15:55:14 7 AND PRIOR DBMS_RANDOM.value IS NOT NULL
15:55:14 8 )
15:55:14 9 MODEL
15:55:14 10 PARTITION BY (str)
15:55:14 11 DIMENSION BY (row_number() over (PARTITION BY str ORDER BY nu) rn)
15:55:14 12 MEASURES (ch fin_str,ch,ln)
15:55:14 13 RULES
15:55:14 14 (fin_str[rn >1] = CASE WHEN instr(fin_str[cv()-1], ch[cv()])<1 THEN fin_str[cv()-1]||ch[cv()] ELSE fin_str[cv()-1] END )
15:55:14 15 )
15:55:14 16 WHERE rn=ln;
STR FIN_STR
-------------------------------- --------------------------------
ABCD6F6CAAdXX ABCD6FdX
Elapsed: 00:00:00.06
15:55:15 >
15:55:15 >SELECT xmlquery('fn:codepoints-to-string( fn:distinct-values(fn:string-to-codepoints(str) ) )'
15:55:15 2 passing xmlelement("str", 'ABCD6F6CAAdXX') RETURNING content).getstringval() res
15:55:15 3 FROM dual;
RES
------------------------------------------------------------------------------------------------------------------------
6ABCDFXd

4.

-- 根据10楼的代码,现在我真的很质疑 Connect by 的效率了:
-- 下面是两个函数(实现同样的功能:将一串字符串去重,只顺序选择第一次出现的字符)
-- 其中:f1函数是用的connect by,然后循环追加再输出;
f2用的是直接循环中if判断(10楼写的函数);
-- 函数 f1
CREATE OR REPLACE FUNCTION f1(i_str in varchar2)
RETURN varchar2
IS
v_str VARCHAR2(4000);
CURSOR cur is
with a as (select level as lv, substr(i_str,level, 1) as substrs
from dual
connect by level<=length(i_str) )
select a1.lv, a1.substrs
from a a1
where not exists (select 1
from a a2
where a2.lv<a1.lv and a2.substrs=a1.substrs)
order by a1.lv;
BEGIN
v_str := '';
FOR i in cur LOOP
v_str:=v_str||i.substrs;
END LOOP;
RETURN v_str;
END;
/
-- 函数 f2
create or replace function f2(pstr in varchar2) return varchar2
is
v_newstr varchar2(4000) := null;
i pls_integer:=1;
begin
for i in 1..length(pstr) loop
if instr(v_newstr,substr(pstr,i,1))<=0 or v_newstr is null then
v_newstr :=v_newstr||substr(pstr,i,1);
end if;
end loop;
return v_newstr;
end;
/
-- 效率测试:
CREATE TABLE t(
id number(18,0),
random_str varchar2(40)
);
-- 先随机插入10万条数据
BEGIN
FOR i IN 1..100000 LOOP
INSERT INTO t(id, random_str)
SELECT i, dbms_random.string('U',1)||dbms_random.string('L',39) from dual;
IF mod(i,100)=0 THEN
COMMIT;
END IF;
END LOOP;
COMMIT;
END;
/
-- 创建 t1 表,用来测试函数f1
CREATE TABLE t1 as select * from t where 1=2;
-- 创建 t2 表,用来测试函数f2
CREATE TABLE t2 as select * from t where 1=2;
-- 循环测试函数f1
DECLARE
v_begin_test date;
v_end_test date;
v_seconds number(18,0);
BEGIN
SELECT sysdate into v_begin_test from DUAL;
FOR i in(select id, random_str from t order by id) LOOP
insert into t1(id, random_str)
select i.id, f1(i.random_str) from dual;
IF mod(i.id,100)=0 THEN
COMMIT;
END IF;
END LOOP;
SELECT sysdate into v_end_test from DUAL;
SELECT (v_end_test-v_begin_test)*24*60*60 INTO v_seconds FROM DUAL;
DBMS_OUTPUT.PUT_LINE('Test function f1 ... '||chr(10)||'Begin test time: '||
to_char(v_begin_test,'YYYY-MM-DD HH24:MI:SS')||chr(10)||' End test time: '||
to_char(v_end_test,'YYYY-MM-DD HH24:MI:SS')||chr(10)||
'Test times(Seconds): '||to_char(v_seconds));
COMMIT;
END;
/
Test function f1 ...
Begin test time: 2011-06-14 11:02:35
End test time: 2011-06-14 11:07:19
Test times(Seconds): 284
PL/SQL 过程已成功完成。
-- 循环测试函数f2
DECLARE
v_begin_test date;
v_end_test date;
v_seconds number(18,0);
BEGIN
SELECT sysdate into v_begin_test from DUAL;
FOR i in(select id, random_str from t order by id) LOOP
insert into t2(id, random_str)
select i.id, f2(i.random_str) from dual;
IF mod(i.id,100)=0 THEN
COMMIT;
END IF;
END LOOP;
COMMIT;
SELECT sysdate into v_end_test from DUAL;
SELECT (v_end_test-v_begin_test)*24*60*60 INTO v_seconds FROM DUAL;
DBMS_OUTPUT.PUT_LINE('Test function f2 ... '||chr(10)||'Begin test time: '||
to_char(v_begin_test,'YYYY-MM-DD HH24:MI:SS')||chr(10)||' End test time: '||
to_char(v_end_test,'YYYY-MM-DD HH24:MI:SS')||chr(10)||
'Test times(Seconds): '||to_char(v_seconds));
END;
/
Test function f2 ...
Begin test time: 2011-06-14 11:10:04
End test time: 2011-06-14 11:10:11
Test times(Seconds): 7
PL/SQL 过程已成功完成。

5.

create or replace function f(pstr in varchar2) return varchar2
is
v_newstr varchar2(100) := null;
i pls_integer:=1;
begin
for i in 1..length(pstr) loop
if instr(v_newstr,substr(pstr,i,1))<=0 or v_newstr is null then
v_newstr :=v_newstr||substr(pstr,i,1);
end if;
end loop;
return v_newstr;
end;
/

 

Attachment link:

http://item.taobao.com/item.htm?id=8480978461