今天研究这样采集别人网站的数据,有点小心得,在这里记录一下。以下是全部代码:
1 <?php
2 $url = "www.website.com"; //自己做替换
3 $parse = parse_url($url); //对URL进行解析,返回起组成部分。
4 $host = $parse['host'];
5 $path = $parse['path'];
6 $port = 80;
7 $timeout = 80;
8 $fp = @fsockopen($host, $port, $errno, $errstr, $timeout); //打开socket链接
9 if (!$fp){
10 echo $errno."--".$errstr; //如果错误,则返回错误代码和错误信息
11 } else {
12 $out = "POST $path HTTP/1.1\r\n"; //以下是HTTP请求头信息
13 $out .= "Host: ".$host."\r\n";
14 $out .= "Accept: */*\r\n";
17 $out .= "Connection: Close\r\n";
18 $out .= "Cookie: $cookie\r\n\r\n";
19
20 @fwrite($fp, $out); //把请求信息写到链接中
21 $status = stream_get_meta_data($fp);
22 if(!$status['timed_out']) {
23 while (!feof($fp)) {
24 if(($header = @fgets($fp)) && ($header == "\r\n" || $header == "\n")) {
25 break;
26 }
27 }
28
29 $stop = false;
30 while(!feof($fp) && !$stop) {
31 $data = fread($fp,8192); //8192为可返回字节数
34 $return .= $data;
39 }
40 }
41 fclose($fp);
42 print_r($return);
43 }