解析html网页及下载连接


重要代码实现:
procedure TForm1.MyWebBrswTitleChange(Sender: TObject;
  const Text: WideString);
begin
  STitle := Text;
end;

function TForm1.ChangeURL(i,j: integer): Boolean;
var
  Url : WideString;
begin
  Result := False;
  try
    Url := JKWUrlPIC + IntToStr(i) + '/index';
    if j <> 1 then Url := Url + '-' + IntToStr(j);
    Url := Url + '.html';
    MyWebBrsw.Navigate(Url);
    repeat
      Application.ProcessMessages;
    until (not MyWebBrsw.Busy);
    if (Pos('该页无法显示',STitle) > 0) or (Pos('无权访问',STitle) > 0) or (Pos('找不到服务器',STitle) > 0) or (Pos('无法找到该页',STitle) > 0) then
      Exit;
  except
    Exit;
  end;
  Result := True;
end;
procedure TForm1.BtnStartClick(Sender: TObject);
var
  IHtmDoc:IHTMLDocument2;
  IHtmElmCln:IHTMLElementCollection;
  Len,i:integer;
  IElm:IHTMLElement;
  vAttri:Variant;
  iType, ipage: integer;
Label Lab1;
begin
  Memo1.Lines.Clear;
  Memo1.Lines.Add('Please wait for a moment... ... ');
  Memo1.Lines.Add('');
  FrmHtml := TFrmHtml.Create(nil);
  try
    FrmHtml.Memo1.Lines.Clear;       //获得Webbrowser对象中的文档对象
    for iType := 1 to 4 do
    begin
      ipage := 1;
      while ChangeURL(iType,ipage) do
      begin
        Inc(ipage);                         //if ipage = 3 then  Break;  showmessage('');
        if CheckBox1.Checked then
          goto Lab1;
        IHtmDoc := MyWebBrsw.Document as IHTMLDocument2;
        IHtmElmCln := IHtmDoc.Get_all; //获得文档中所有的HTML元素集合
        len := IHtmElmCln.Get_length;
        for i := 0 to Len-1 do begin
          IElm := IHtmElmCln.item(i,varempty) as IHTMLElement;
          if IElm.Get_tagName = 'IMG' then
          begin
            vAttri := IElm.getAttribute('SRC',0);
            if pos('jpg', vattri) <> 0 then
              FrmHtml.Memo1.Lines.Add(vAttri);
          end;
        end;
      end;
    end;
    Lab1:
      ;
    FrmHtml.ShowModal;
  finally
    FrmHtml.Free;
  end;

保存部分:
var
  i: integer;
  sFileName: string;
begin
  If not SetCurrentDir(Trim(LabeledEdit2.Text)) then
  begin
    Application.MessageBox('路径不存在!','提示',MB_OK);
    exit;
  end;
  if LabeledEdit2.Text[Length(LabeledEdit2.Text)] <> '\' then
    LabeledEdit2.Text := LabeledEdit2.Text + '\';
  ProgressBar1.Max := Memo1.Lines.Count;
  for i := 0 to Memo1.Lines.Count - 1 do
  begin
    sFileName := copy(Memo1.Lines.Strings[i],1, pos('xt',Memo1.Lines.Strings[i])-1) + 'dt'+copy(Memo1.Lines.Strings[i],pos('xt',Memo1.Lines.Strings[i])+2,Length(Memo1.Lines.Strings[i]));
    sFileName := Copy(sFileName,1, length(sFileName) - 5) + 'd.jpg';
    try
      URLDownloadToFile(nil,Pchar(sFileName), Pchar(Trim(LabeledEdit2.Text + copy(sFileName,pos('200',sFileName),length(sFileName)))),0,nil);
    except
      ShowMessage('磁盘无空间,或者其他异常!');
    end;
    ProgressBar1.Position := i;
  end;
end;
posted @ 2005-05-11 16:08  JustLive  阅读(1453)  评论(0编辑  收藏  举报