php利用curl获取网页title内容
/**$html = curl_get_file_contents($url); $title = get_title_contents($html); var_dump($title);*/ function curl_get_file_contents($url,$referer='') { static $curl_loops = 0;//避免死了循环必备 static $curl_max_loops = 3; $useragent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"; $ch = curl_init(); curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_HEADER,true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //不验证证书 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //不验证证书 curl_setopt($ch,CURLOPT_USERAGENT,$useragent); curl_setopt($ch,CURLOPT_RETURNTRANSFER,true); curl_setopt($ch,CURLOPT_REFERER,$referer); $data = curl_exec($ch); $ret = $data; list($header,$data) = explode("\r\n\r\n",$data,2); $http_code = curl_getinfo($ch,CURLINFO_HTTP_CODE); $last_url = curl_getinfo($ch,CURLINFO_EFFECTIVE_URL); curl_close($ch); if ($http_code == 301 || $http_code == 302) { $matches = array(); preg_match('/Location:(.*?)\n/',$header,$matches); $url = @parse_url(trim(array_pop($matches))); if (!$url) { return $data; } $new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . (isset($url['query']) ? '?' . $url['query'] : ''); if ($curl_loops++ >= $curl_max_loops) { return false; }else { $new_url = stripslashes($new_url); return curl_get_file_contents($new_url); } } else { list($header,$data) = explode("\r\n\r\n",$ret,2); return $data; } } function get_title_contents($html){ // 解析 HTML 的 <head> 区段 // <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> // <meta content="text/html; charset=gb2312" http-equiv="Content-Type"> preg_match("/<head.*>(.*)<\/head>/smUi",$html, $htmlHeaders); //var_dump($output);die(); if(!count($htmlHeaders)){ $title = "无法解析数据中的 <head> 区段"; } // 取得 <head> 中 meta 设置的编码格式<meta charset="gb2312"> if(preg_match('/<meta.*charset=(("){0,1}[a-zA-Z0-9-]*("){0,1})/',$htmlHeaders[1], $results)){ $charset = $results[1]; }else{ $charset = "None"; } $charset = str_replace('"','',$charset); // 取得 <title> 中的文字 if(preg_match("/<title>(.*)<\/title>/Ui",$htmlHeaders[1], $htmlTitles)){ if(!count($htmlTitles)){ $title = "无法解析 <title> 的内容"; exit; } // 将 <title> 的文字编码格式转成 UTF-8 if($charset == "None"){ $title=$htmlTitles[1]; }else{ $title=iconv($charset, "UTF-8", $htmlTitles[1]); } } return html_entity_decode($title); }
支持https,302跳转