bai_du 采集代码(已过期)
<?php $url = "http://www.baidu.com/s?wd=site:www.xxxxxx.com+inurl:hot&tn=baidulaonian&pn="; $i=0; while($i<64){ $url1 = $url.($i*10); $html = gethtml($url1); $string = htmlspecialchars ($html); if(strpos($string,"http://www.faxingzhan.com")==false){ break; } //获取列表 preg_match_all('/[a-zA-z]+:\/\/www.xxxxxx.com\/[^\s]*/',$string,$data); $result = $data[0]; if(is_array($result)){ foreach($result as $item){ echo $item .'<br />'; } }else{ continue; } $i++; } function gethtml($url){ $header = array ( 'User-Agent: Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.146 Safari/537.36' ); $ch = curl_init (); curl_setopt ( $ch, CURLOPT_URL, $url ); curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header ); curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); // 执行 $content = curl_exec ( $ch ); if ($content == FALSE) { echo "error:" . curl_error ( $ch ); } // 关闭 curl_close ( $ch ); //输出结果 return $content; } ?>
正则写的还有些缺陷 适合百度老年版(目前已经失效,仅供参考)