php抓取网页数据遇到的问题
1.file_get_contents无法抓取https安全协议的网站
改用curl获取数据
function file_get_contents_by_curl($url){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL,$url); curl_setopt($ch, CURLOPT_HEADER,0); curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,false); $result = curl_exec($ch); curl_close($ch);return $result;}
require('phpQuery.php');
$content = file_get_contents_by_curl($url);
$html = phpQuery::newDocumentHTML($content);
2.iconv特殊字符无法进行转换
改进方式:mb_convert_encoding($str,'GBK','utf-8');