抓取google链接的php代码

<?php
header("Content-Type: text/html;charset=utf-8");
set_time_limit(0);
function geturl($keywords, $page, $num)
{      
        $page = ($page - 1) * 10;       
        $content = file_get_contents("http://www.google.com/search?sclient=psy-ab&hl=en&start=$page&source=hp&q=$keywords&pbx=1&oq=$keywords&num=$num&aq=f&aqi=g4");              
        $preg = '/<h3\s*class="r"\s*>.*/im';                       
        preg_match_all($preg, $content, $m);        
        preg_match_all('/<a(.*?)>(.*?)/', $m[0][0], $ms);
        
        $list = array();        
        foreach ($ms[1] as $link)
        {
                preg_match('/http:\/\/[a-zA-Z0-9._-]*/', $link, $matches);                
                if (!empty($matches[0]))
                {
                        $list[] = $matches[0];
                }
        }
        $list = array_unique($list);
        return $list;
}

$keywords = 'site:kugou.com+inurl:upload';
$page = 10;
$num = 20;
for ($i=1;$i<=$page;$i++)
{        
        $url = geturl($keywords, $i, $num);
        print_r('Page: '.$i.' Results Count: '.count($url).'');        
        foreach ($url as $u)
        {
                print_r($u.'');
        }
        if (count($url)<$num)
        {
                break;
        }
}
?>

 

posted on 2013-10-01 22:40  =_=!  阅读(320)  评论(0编辑  收藏  举报

导航