抓取google链接的php代码
<?php header("Content-Type: text/html;charset=utf-8"); set_time_limit(0); function geturl($keywords, $page, $num) { $page = ($page - 1) * 10; $content = file_get_contents("http://www.google.com/search?sclient=psy-ab&hl=en&start=$page&source=hp&q=$keywords&pbx=1&oq=$keywords&num=$num&aq=f&aqi=g4"); $preg = '/<h3\s*class="r"\s*>.*/im'; preg_match_all($preg, $content, $m); preg_match_all('/<a(.*?)>(.*?)/', $m[0][0], $ms); $list = array(); foreach ($ms[1] as $link) { preg_match('/http:\/\/[a-zA-Z0-9._-]*/', $link, $matches); if (!empty($matches[0])) { $list[] = $matches[0]; } } $list = array_unique($list); return $list; } $keywords = 'site:kugou.com+inurl:upload'; $page = 10; $num = 20; for ($i=1;$i<=$page;$i++) { $url = geturl($keywords, $i, $num); print_r('Page: '.$i.' Results Count: '.count($url).''); foreach ($url as $u) { print_r($u.''); } if (count($url)<$num) { break; } } ?>