解析html网页及下载连接
重要代码实现:
procedure TForm1.MyWebBrswTitleChange(Sender: TObject;
const Text: WideString);
begin
STitle := Text;
end;
function TForm1.ChangeURL(i,j: integer): Boolean;
var
Url : WideString;
begin
Result := False;
try
Url := JKWUrlPIC + IntToStr(i) + '/index';
if j <> 1 then Url := Url + '-' + IntToStr(j);
Url := Url + '.html';
MyWebBrsw.Navigate(Url);
repeat
Application.ProcessMessages;
until (not MyWebBrsw.Busy);
if (Pos('该页无法显示',STitle) > 0) or (Pos('无权访问',STitle) > 0) or (Pos('找不到服务器',STitle) > 0) or (Pos('无法找到该页',STitle) > 0) then
Exit;
except
Exit;
end;
Result := True;
end;
procedure TForm1.BtnStartClick(Sender: TObject);
var
IHtmDoc:IHTMLDocument2;
IHtmElmCln:IHTMLElementCollection;
Len,i:integer;
IElm:IHTMLElement;
vAttri:Variant;
iType, ipage: integer;
Label Lab1;
begin
Memo1.Lines.Clear;
Memo1.Lines.Add('Please wait for a moment... ... ');
Memo1.Lines.Add('');
FrmHtml := TFrmHtml.Create(nil);
try
FrmHtml.Memo1.Lines.Clear; //获得Webbrowser对象中的文档对象
for iType := 1 to 4 do
begin
ipage := 1;
while ChangeURL(iType,ipage) do
begin
Inc(ipage); //if ipage = 3 then Break; showmessage('');
if CheckBox1.Checked then
goto Lab1;
IHtmDoc := MyWebBrsw.Document as IHTMLDocument2;
IHtmElmCln := IHtmDoc.Get_all; //获得文档中所有的HTML元素集合
len := IHtmElmCln.Get_length;
for i := 0 to Len-1 do begin
IElm := IHtmElmCln.item(i,varempty) as IHTMLElement;
if IElm.Get_tagName = 'IMG' then
begin
vAttri := IElm.getAttribute('SRC',0);
if pos('jpg', vattri) <> 0 then
FrmHtml.Memo1.Lines.Add(vAttri);
end;
end;
end;
end;
Lab1:
;
FrmHtml.ShowModal;
finally
FrmHtml.Free;
end;
保存部分:
var
i: integer;
sFileName: string;
begin
If not SetCurrentDir(Trim(LabeledEdit2.Text)) then
begin
Application.MessageBox('路径不存在!','提示',MB_OK);
exit;
end;
if LabeledEdit2.Text[Length(LabeledEdit2.Text)] <> '\' then
LabeledEdit2.Text := LabeledEdit2.Text + '\';
ProgressBar1.Max := Memo1.Lines.Count;
for i := 0 to Memo1.Lines.Count - 1 do
begin
sFileName := copy(Memo1.Lines.Strings[i],1, pos('xt',Memo1.Lines.Strings[i])-1) + 'dt'+copy(Memo1.Lines.Strings[i],pos('xt',Memo1.Lines.Strings[i])+2,Length(Memo1.Lines.Strings[i]));
sFileName := Copy(sFileName,1, length(sFileName) - 5) + 'd.jpg';
try
URLDownloadToFile(nil,Pchar(sFileName), Pchar(Trim(LabeledEdit2.Text + copy(sFileName,pos('200',sFileName),length(sFileName)))),0,nil);
except
ShowMessage('磁盘无空间,或者其他异常!');
end;
ProgressBar1.Position := i;
end;
end;