简单爬虫,查博客浏览量

简单的字符串操作,稍微有些不严谨,但还可以修改。直接运行下面的yeshu()方法就可以。

yeshu("http://www.cnblogs.com/cyrfr/"); //博客园首页地址,一定是进去有文章那那个页面

function yeshu($n)
{
	$html = file_get_contents($n);  
	$foun="default.html?page=";
	if(strpos($html,$foun))
	{
		$d= (strpos($html,$foun))+18; 
		$founend='">下一页';
		$e= (strpos($html,$founend));  
		$lent = $e-$d;
		$page=substr($html,$d,$lent); 
		$url = $n.$foun.$page;
		$htm = file_get_contents($url);  
		$str = "<div class=\"pager\">";
		$strend = "页: <a href";
		$f=strpos($htm,$str)+25;
		$g=strpos($htm,$strend);
		$chang = $g-$f;
		$xunhuan =  substr($htm,$f,$chang); 
	}
	else
	{
		$xunhuan = 1;
	}
	zongshu($xunhuan,$n);
	
}

function zongshu($nn,$main){
	$num = 0;
	for($i=1;$i<=$nn;$i++)
	{
		$url = $main."default.html?page=".$i;
		$html = file_get_contents($url); 
		while(strpos($html,"阅读("))
		{
			$find="阅读(";  
			$b= (strpos($html,$find))+7;
			$c= (strpos($html,") 评论"));
			$lenth = $c-$b;
			$num+= substr($html,$b,$lenth); 
			$html = substr($html,$c+8);
		} 
	}	
	echo $num;
}

  

posted @ 2017-04-26 21:22  发瑞  阅读(336)  评论(0编辑  收藏  举报