利用php抓取蜘蛛爬虫痕迹的示例代码

// 获取蜘蛛爬虫名或防采集
function isSpider(){
    $bots = array(
        'Google'    => 'googlebot',
        'Baidu'     => 'baiduspider',
        'Yahoo'     => 'yahoo slurp',
        'Soso'      => 'sosospider',
        'Msn'       => 'msnbot',
        'Altavista' => 'scooter ',
        'Sogou'     => 'sogou spider',
        'Yodao'     => 'yodaobot'
    );
    $userAgent = strtolower($_SERVER['HTTP_USER_AGENT']);
    foreach ($bots as $k => $v) {
        if (strstr($v, $userAgent)) {
            return $k;
            break;
        }
    }
    return false;
}

// 获取哪种蜘蛛爬虫后保存蜘蛛痕迹。
// 根据采集时HTTP_USER_AGENT是否为空来防止采集
// 抓蜘蛛爬虫
$spi  = isSpider();
if ($spi) {
    $tlc_thispage = addslashes($_SERVER['HTTP_USER_AGENT']);
    $file = 'robot.txt';
    $time = date('Y-m-d H:i:s',mktime());
    $handle = fopen($file,'a+');
    $PR = $_SERVER['REQUEST_URI'];
    fwrite($handle, "Time:{$time} ROBOT:{$spi} AGENT:{$tlc_thispage} URL:{$PR} \n\r");
    fclose($handle);
}

 

posted @ 2017-03-07 11:11  chenjiacheng  阅读(520)  评论(0编辑  收藏  举报