delphi : 取得网页源码内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
取得网页的源码内容的函数以及调用方法供大家参考:
program geturl;
uses
wininet,
windows;
//取网页内容
function StrPas(const Str: PChar): string;
begin
Result := Str;
end;
function GetWebPage(const Url: string):string;
var
Session,
HttpFile:HINTERNET;
szSizeBuffer:Pointer;
dwLengthSizeBuffer:DWord;
dwReserved:DWord;
dwFileSize:DWord;
dwBytesRead:DWord;
Contents:PChar;
begin
Session:=InternetOpen('',0,niL,niL,0);
HttpFile:=InternetOpenUrl(Session,PChar(Url),niL,0,0,0);
dwLengthSizeBuffer:=1024;
HttpQueryInfo(HttpFile,5,szSizeBuffer,dwLengthSizeBuffer,dwReserved);
GetMem(Contents,dwFileSize);
InternetReadFile(HttpFile,Contents,dwFileSize,dwBytesRead);
InternetCloseHandle(HttpFile);
InternetCloseHandle(Session);
Result:=StrPas(Contents);
FreeMem(Contents);
end;
调用方法 GetWebPage(网页地址);
Delphi取得网页源码内容的另一种办法
unit Unit1;
 
interface
 
uses
   Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
   Dialogs, StdCtrls, Sockets;
 
type
   TForm1 = class(TForm)
     Button1: TButton;
     TcpClient1: TTcpClient;
     Memo1: TMemo;
     Edit1: TEdit;
     procedure Button1Click(Sender: TObject);
   private
     { Private declarations }
   public
     { Public declarations }
   end;
 
var
   Form1: TForm1;
 
implementation
 
{$R *.dfm}
 
procedure TForm1.Button1Click(Sender: TObject);
var
   Test: string;
   HttpLen: integer;
begin
   TcpClient1.RemoteHost := 'www.163.cn';
   TcpClient1.RemotePort := '80';
   TcpClient1.Active := true;
   if TcpClient1.Connected then
   begin
     //发送HTTP1.1指令
     TcpClient1.Sendln('GET / HTTP/1.1');
     TcpClient1.Sendln('Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/msword, */*');
     TcpClient1.Sendln('User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Maxthon)');
     TcpClient1.Sendln('Host: www.163.cn');
     TcpClient1.Sendln('Connection: Keep-Alive');
     TcpClient1.Sendln('');
   end;
   HttpLen := 0;
   //循环读取所有返回的数据头信息
   while true do
   begin
     Test := TcpClient1.Receiveln();
     Memo1.Lines.Add(Test);
     if Test = '' then Break;
     if Pos('Content-Length: ', Test) > 0 then
     begin
       Delete(Test, 1, 16);
       HttpLen := StrToInt(Test); //获取将要读取的数据长度
     end;
   end;
   LockWindowUpdate(Memo1.Handle);
   Memo1.Clear;
//循环读取所有返回的数据直接数据接收完毕
   while (Length(Memo1.Text) < HttpLen - 2) or (TcpClient1.WaitForData(0) and (HttpLen = 0)) do
   begin
     Memo1.Lines.Add(TcpClient1.Receiveln(#$0D#$0A));
     Application.ProcessMessages;
     Edit1.Text := Format('总长度:%d   已下载:%d ', [HttpLen, Length(Memo1.Text)]);
   end;
 
   //有些网页返回的非ANSI字符串,则需要转码,否则中文全是乱码
   if Length(Memo1.Text) = HttpLen - 2 then Memo1.Text := Utf8ToAnsi(Memo1.Text);
 
   LockWindowUpdate(0);
   Memo1.Lines.SaveToFile('d:\test.txt');
   ShowMessage('完成数据下载');
end;
 
end.

  

posted @   星星的学习小志  阅读(1932)  评论(1编辑  收藏  举报
编辑推荐:
· 没有源码,如何修改代码逻辑?
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· .NET Core 托管堆内存泄露/CPU异常的常见思路
阅读排行:
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· 没有源码,如何修改代码逻辑?
· NetPad:一个.NET开源、跨平台的C#编辑器
· PowerShell开发游戏 · 打蜜蜂
· 凌晨三点救火实录:Java内存泄漏的七个神坑,你至少踩过三个!
点击右上角即可分享
微信分享提示