unit Unit1;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;

type
  TForm1 = class(TForm)
    Memo1: TMemo;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}

uses RegularExpressions, msxml;

const
  patternUrl    = 'http(s)?://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?'; //URL地址
  patternEmail  = '\w+([-+.'']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*'; //Email地址
  patternTel    = '(\(\d{3}\)|\d{3}-)?\d{8}';                      //电话号码
  patternIDCard = '\d{17}[\d|X]|\d{15}';                           //身份证号码


{获取网页源码的函数}
function GetWebPageText(const AUrl: string): string;
begin
  with CoXMLHTTP.Create do begin
    open('Get', AUrl, False, EmptyParam, EmptyParam);
    send(EmptyParam);
    Result := responseText;
  end;
end;

procedure TForm1.Button1Click(Sender: TObject);
var
  txt,url: string;
  match: TMatch;
begin
  Memo1.Clear;
  url := 'http://www.google.com.hk/search?hl=zh-TW&source=hp&biw=1440&bih=796&q=Email+%E7%94%B5%E8%AF%9D+%E8%BA%AB%E4%BB%BD%E8%AF%81&btnG=Google+%E6%90%9C%E5%B0%8B&aq=f&aqi=&aql=&oq=';
  txt := GetWebPageText(url);

  for match in TRegEx.Matches(txt, patternUrl) do Memo1.Lines.Add(match.Value);
  Memo1.Lines.Add('--------------------------');
  for match in TRegEx.Matches(txt, patternEmail) do Memo1.Lines.Add(match.Value);
  Memo1.Lines.Add('--------------------------');
  for match in TRegEx.Matches(txt, patternTel) do Memo1.Lines.Add(match.Value);
  Memo1.Lines.Add('--------------------------');
  for match in TRegEx.Matches(txt, patternIDCard) do Memo1.Lines.Add(match.Value);
  Memo1.Lines.Add('--------------------------');
end;

end.

posted on 2011-03-29 13:27  万一  阅读(4866)  评论(3编辑  收藏  举报