php 采集常用代码

function curl_get($url, $gzip=false){
        $curl = curl_init($url);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
		curl_setopt($curl,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
      //  curl_setopt($curl, CURLOPT_ENCODING, "gzip"); // gzip
        $content = curl_exec($curl);
        curl_close($curl);
        return $content;
}

function get_middle($before,$after,$str)/*{{{*/
{
	if(strpos($str, $before) === false){
		return '';
	}

	$data = explode($before, $str);
	$data = $data[1];

	if(strpos($data, $after) === false){
		return '';
	}

	$data = explode($after, $data);
	$data = $data[0];
	return trim($data);
}/*}}}*/

用法

$html=curl_get($url);

$title=get_middle('<title>','</title>',$html); //截取以<title>开头至</title>之间的字符

常用的一些过滤代码

$content= preg_replace( "@<svg(.*?)</svg>@is", "", $content );  //将svg标签内容替换为空

提取某个变量

<a href="/cat/2546">我是大侠</a>

preg_match_all("|<a href=\"/cat/(.*)\">(.*)</a>|isU",$html,$daijiejue); 
$c1=$daijiejue[1][0]; //  /cat/2546
$c2=$daijiejue[2][0]; //我是大侠

  

 

posted @ 2018-10-27 10:04  私家菜地  阅读(178)  评论(0编辑  收藏  举报