Oracle存储过程例子:运用了正则表达式、数组等

CREATE OR REPLACE PROCEDURE "SP_GET_ARTICLE_DATA"
(article_detail_id_$ number,
 Record_Md5_ID_$ varchar2,
 content_text_$ clob,
 ReturnValue out number
) is
begin
   declare
   --自定义数组
    TYPE StringArray IS table OF VARCHAR2(1000);
   
    Offset_URL number;
    Offset_Email number;
    Offset_Phone number;
    Offset_Account number;
    
    --数据下标
    URL_V_Num number:=1;
    Email_V_Num number:=1;
    Phone_V_Num number:=1;
    Account_V_Num number:=1;
    
    --数据元素存在标志
    Is_Exsit_Flag number;
    
    URL varchar2(3000);
    Email varchar2(200);
    Phone varchar2(100);
    Bank_Account varchar2(100);
    
    URL_Array StringArray:=StringArray();
    Email_Array StringArray:=StringArray();
    Phone_Array StringArray:=StringArray();
    Account_Array StringArray:=StringArray();

    --常量
    URL_RegExp constant varchar2(300):='(((file|gopher|news|nntp|telnet|http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&'||'%_\./-~-]*)?';
    Email_RegExp constant varchar2(200):='[A-Za-z0-9_]+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*';
    Phone_RegExp constant varchar2(300):='[+[:digit:]]{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]*\d{4}|[+[:digit:]]?[-\.,[:blank:]]*\d{1,2}[-\.,[:blank:]]*\d{3}[-\.,[:blank:]]+\d{4}|\d{3}[-\.,[:blank:]]+\d{4}';
    Bank_Account_RegExp constant varchar2(200):='(SA)?[[:digit:]]{22}|(SA)?[[:blank:]]*([[:digit:]]{4}|\d{2})[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*[[:digit:]]{4}[-\.,[:blank:]]*(\d{4}|\d{2})';
    URL_Escape constant varchar2(100):='\.jpg|\.rar|\.zip|\.png|\.gif|\.pdf|\.doc|\.excel|\.swf|\.mpg|\.\.|\.rmvb';
    
    DUP_VAL_INDEX exception;
    pragma exception_init(DUP_VAL_INDEX,-0001);
    
  begin
   if article_detail_id_$ is not null and article_detail_id_$<>0 then
    Offset_URL:=1;
    Offset_Email:=1;
    Offset_Phone:=1;
    Offset_Account:=1;

    while Offset_URL<>0 or Offset_Email<>0 or Offset_Phone<>0 or Offset_Account<>0
    loop
       --获取URL
       if Offset_URL<>0 then
          select trim(to_char(REGEXP_SUBSTR(content_text_$,URL_RegExp, Offset_URL, 1,'i')))
          into URL from dual;
          --获取从offset位置开始的第一个符合URL表达式的字符串位置,并赋给offset
          select REGEXP_INSTR(content_text_$, URL_RegExp, Offset_URL, 1, 1,'i')
          into Offset_URL from dual;
       end if;
       --获取Email
       if Offset_Email<>0 then
          select trim(to_char(REGEXP_SUBSTR(content_text_$,Email_RegExp, Offset_Email, 1,'i')))
          into Email from dual;
          --获取从offset位置开始的第一个符合Email表达式的字符串位置,并赋给offset
          select REGEXP_INSTR(content_text_$, Email_RegExp, Offset_Email, 1, 1,'i')
          into Offset_Email from dual;
       end if;
       --获取Phone
       if Offset_Phone<>0 then
          select trim(to_char(REGEXP_SUBSTR(content_text_$,Phone_RegExp, Offset_Phone, 1,'i')))
          into Phone from dual;
          --获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
          select REGEXP_INSTR(content_text_$, Phone_RegExp, Offset_Phone, 1, 1,'i')
          into Offset_Phone from dual;
       end if;
       --获取Bank_Account
       if Offset_Account<>0 then
          select trim(to_char(REGEXP_SUBSTR(content_text_$,Bank_Account_RegExp, Offset_Account, 1,'i')))
          into Bank_Account from dual;
          --获取从offset位置开始的第一个符合Phone表达式的字符串位置,并赋给offset
          select REGEXP_INSTR(content_text_$, Bank_Account_RegExp, Offset_Account, 1, 1,'i')
          into Offset_Account from dual;
       end if;
       
       --URL插入到数组中
       if (length(URL)>0 and length(URL)<800) and REGEXP_INSTR(URL, URL_Escape, 1, 1, 0, 'i')=0 then
         Is_Exsit_Flag :=0;         
         FOR I IN 1..URL_Array.COUNT LOOP      
           if URL=URL_Array(I) then
            Is_Exsit_Flag:=1;
           end if;
         END LOOP;
         --如果数组中不存在当前值,则插入到数组
         if Is_Exsit_Flag=0 then            
            URL_Array.EXTEND(1);
            URL_Array(URL_V_Num):=URL;
            URL_V_Num:=URL_V_Num+1;
         end if;
       end if; 
       --Email插入到数组中
       if (length(Email)>0 and length(Email)<100) then
         Is_Exsit_Flag :=0;         
         FOR I IN 1..Email_Array.COUNT LOOP      
           if Email=Email_Array(I) then
            Is_Exsit_Flag:=1;
           end if;
         END LOOP;
         --如果数组中不存在当前值,则插入到数组
         if Is_Exsit_Flag=0 then            
            Email_Array.EXTEND(1);
            Email_Array(Email_V_Num):=Email;
            Email_V_Num:=Email_V_Num+1;
         end if;
       end if; 
       --Phone插入到数组中
       if (length(Phone)>6 and length(Phone)<100) then
         Is_Exsit_Flag :=0;         
         FOR I IN 1..Phone_Array.COUNT LOOP      
           if Phone=Phone_Array(I) then
            Is_Exsit_Flag:=1;
           end if;
         END LOOP;
         --如果数组中不存在当前值,则插入到数组
         if Is_Exsit_Flag=0 then            
            Phone_Array.EXTEND(1);
            Phone_Array(Phone_V_Num):=Phone;
            Phone_V_Num:=Phone_V_Num+1;
         end if;
       end if; 
       --Bank_Account插入到数组中
       if (length(Bank_Account)>0 and length(Bank_Account)<100) then
         Is_Exsit_Flag :=0;         
         FOR I IN 1..Account_Array.COUNT LOOP      
           if Bank_Account=Account_Array(I) then
            Is_Exsit_Flag:=1;
           end if;
         END LOOP;
         --如果数组中不存在当前值,则插入到数组
         if Is_Exsit_Flag=0 then            
            Account_Array.EXTEND(1);
            Account_Array(Account_V_Num):=Bank_Account;
            Account_V_Num:=Account_V_Num+1;
         end if;
       end if;       
    end loop;
    
    -- Dbms_Output.put_line('Array is:'||URL_Array(1));
    
    --插入获取到的URL
       FOR I IN 1..URL_Array.COUNT LOOP      
          insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
          values(article_detail_id_$,Record_Md5_ID_$,'URL','1',URL_Array(I));
       END LOOP;
       --插入获取到的Email
       FOR I IN 1..Email_Array.COUNT LOOP
          insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
          values(article_detail_id_$,Record_Md5_ID_$,'Email','0',Email_Array(I));
       END LOOP;
       --插入获取到的Phone
       FOR I IN 1..Phone_Array.COUNT LOOP
          insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
          values(article_detail_id_$,Record_Md5_ID_$,'Phone','0',Phone_Array(I));
       END LOOP;
       --插入获取到的Bank_Account
       FOR I IN 1..Account_Array.COUNT LOOP
          insert into article_data(article_detail_id,article_record_md5_id,data_type,data_is_url,data_value)
          values(article_detail_id_$,Record_Md5_ID_$,'Bank Account','0',Account_Array(I));
       END LOOP;
       
   end if;
  Exception
    when NO_DATA_FOUND then
      ReturnValue:=-1000;
    when DUP_VAL_INDEX then
      ReturnValue:=-1001;
    when VALUE_ERROR then
      ReturnValue:=-1002;
    when TOO_MANY_ROWS then
      ReturnValue:=-1003;
    when OTHERS then
      ReturnValue:=-1004;
      --Dbms_Output.put_line('The SQLCode is: '||SQLCODE);
      --Dbms_Output.put_line('The SQLERRM is: '||SQLERRM);
  end;
end SP_GET_ARTICLE_DATA;
posted @ 2011-07-02 19:49  jex  阅读(469)  评论(0编辑  收藏  举报