写一个百度url收录检测的web_php小工具

为了网站做SEO的需要,统计网站的页面收录率,写个小工具,目前虽然还不完善,但很想跟大家分享一下。

使用方法,可以先用sitemapX软件,生成网站链接的列表,这个列表比较靠谱,因为网站内部能链接到的网站基本都是比较重要的页面。

把生成的列表粘贴进小工具,查询即可。--目前如果一次请求过多,会出现被百度屏蔽的情况,目前还没解决,大家有啥好的方案可以分享来哈!

测试地址:http://zhidong10.com/site_rs_baidu/baidu.php

以后会增加:收录、未收录筛选,手动设置请求参数,数据导出,未收录重新检测等。2013-03-23

废话不多说,上代码。

发送请求、数据展现:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
    <head>
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <title>百度收录查询</title>
        <link href="css/reset.css" rel="stylesheet" type="text/css" />
        <link href="css/ui.css" rel="stylesheet" type="text/css" />
        <script type="text/javascript" src="js/jquery-1.7.1.min.js"></script>
    </head>

    <body>
        <h1 class="tit">百度收录查询</h1>
        <div class="enterbox">
            <textarea id="TextBox1" ></textarea>
        </div>
        <div class="sub_btn"><a href="javascript: testGG()" >提交</a></div>
        <div class="resutl_tit">查询结果:</div>
        <div id="maindiv">
                  <div id="div3" style="width: 750px;"><!--<a href="javascript:void(0);" title="点击重新查询"><img src="image/rst.gif"></a><span id="rehome">←重新查询</span>--> 
                <!--<div onclick="_export()" id="div_export"></div>-->
                <div id="div4"><span id="num_total">总:</span><span id="num_total2">0</span><span id="num_sl">收录:</span><span id="num_sl2">0</span><span id="num_nosl">未收录:</span><span id="num_nosl2">0</span><span id="percent">收录率:</span><span id="num_pct"></span></div>
            </div>
            <table class="tb1">
                <colgroup>
                    <col id="col1">
                        <col id="col2" />
                        <col id="col3" />
                        <col id="col4" />
                        <col id="col5" />
                </colgroup>
                <thead id="thead">
                    <tr>
                        <th scope="col"><span>No.</span></th>
                        <th scope="col"><span>标题</span></th>
                        <th scope="col"><span>网址</span></th>
                        <th scope="col" class="th2"><div id="div5"><img src="image/sort.gif" id="s1"></div>
                            <span id="sp">收录</span></th>
                        <th style="border-bottom:1px solid #a5a5a5;border-right:1px solid #a5a5a5" scope="col" class="kz"><span>快照</span></th>
                    </tr>
                </thead>
            </table>
        </div>
        <script type="text/javascript">
            var count = {
                total:0,
                time : '',
                url_list:"",
                star:0
            };
            function get_rs(url, num){
                var url =url;
                var num =num;
                $.ajax({
                    url: "rs.php?site="+url+"&time="+ new Date().getTime(),
                    dataType:"json",
                    success: function(data){
                        var title = $("#tr"+num).find(".title");
                        var rs = $("#tr"+num).find(".yo div");
                        var kz = $("#tr"+num).find(".kz div");
                        if(!data) {
                            title.html("数据异常");
                            console.log("data:"+data)
                            return;
                        }
                        if(data.error == 1){
                            title.html(data.title);      
                            rs.attr("class","iconwarn");
                            kz.attr("class","");
                            kz.html("-"); 
                        }
                        if(data.error == 0){
                            title.html(data.title);      
                            rs.attr("class","iconok");
                            kz.attr("class","");
                            kz.html(data.date); 
                        }
                        $("#num_sl2").html($(".yo .iconok").length);
                        $("#num_nosl2").html($(".yo .iconwarn").length);
                        var per =  Math.floor(($(".yo .iconok").length/count.total)*100)
                        $("#num_pct").html(per+"%")
                    },
                    error:function(){
                    
                    }
                });
            }

            function testGG(){
                //清除旧数据
                $(".tb1 tbody tr").remove();
                count = {
                    total:0,
                    time : '',
                    url_list:"",
                    star:0
                };
                $("#num_total2").html("0");
                $("#num_sl2").html("0");
                $("#num_nosl2").html("0"); 
                $("#num_pct").html("");
            
                var txt = document.getElementById("TextBox1");
                count.url_list = txt.value.split("\n");
                count.total = count.total + count.url_list.length;
                $("#num_total2").html(count.total);
                count.time =  setInterval(function(){
                    run();
                }, 1000);//1000毫秒执行一次,防止百度屏蔽攻击
            }
            function run(){
                var len = count.url_list.length;
                if(count.star < len){
                    var order =  (count.star%2==0?"odd1":"odd");
                    var url = $.trim(count.url_list[count.star]);
                    url = url.substr(0,7).toLowerCase()=="http://"?url:"http://"+url;
                    var str = '';
                    str = '<tr class="'+order+'" id="tr'+count.star+'"> <td class="num">'+(count.star+1)+'</td><td class="title">-</td><td class="url2"><a href="'+url+'" target="_blank">'+url+'</a></td><td class="yo"><div class="iconloading"></div></td><td class="kz" style="border:1px;"><div class="iconloading">  </div></td></tr>';
                    $(".tb1").append(str);                                                   
                    get_rs(url,count.star);
                    count.star++;
                }else{
                    clearInterval(count.time);
                }
            
            }
        </script>
    </body>
</html>

请求处理

<?php

//$site = 'http://xsh.changyou.com/dhsh/events/events.shtml';
$site = $_GET['site'];

function fs($r, $t) {//无法匹配到时,返回
    $finfo = array('error' => 1, 'title' => $t);
    $fInfo = json_encode($finfo);
    if (!$r) {
        return $fInfo;
    }
    if (!$r[0]) {
        return $fInfo;
    }
    if (!$r[0][0]) {
        return $fInfo;
    }
}

function str_substr($start, $end, $str) { // 字符串截取函数     
    $temp = explode($start, $str, 2);
    $content = explode($end, $temp[1], 2);
    return $content[0];
}

function curl_file_get_contents($durl) {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $durl);
    curl_setopt($ch, CURLOPT_TIMEOUT, 20);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    $rs = curl_exec($ch);
    curl_close($ch);
    return $rs;
}

//$buffer = @file_get_contents("http://www.baidu.com/s?wd=" . $site);   
$buffer = @file_get_contents("http://www.baidu.com/s?wd=" . $site); //返回输出文本流
$d = @file_get_contents($site);
if (!$d) {
    $s = "路径不存在";
    $finfo = array('error' => 1, 'title' => $s);
    $fInfo = json_encode($finfo);
    echo $fInfo;
    return;
} else {
    $s = str_substr("<title>", "</title>", $d);
}
preg_match_all("/<table[^>]+>.+?<\/table>/", $buffer, $r);
$tmp = fs($r, $s);
if ($tmp) {
    echo $tmp;
    return;
}
preg_match_all("/<span[^>]+>.+?<\/span>/", $r[0][0], $r);
$tmp = fs($r, $s);
if ($tmp) {
    echo $tmp;
    return;
}
//$txt = '<span class="g">  <b>xsh.changyou.com</b>/<b>dhsh</b>/<b>events</b>/events... 2013-1-27  </span>';
//echo $txt;
preg_match_all("/(\d{4}-\d+-\d+)/", $r[0][0], $r);
$tmp = fs($r, $s);
if ($tmp) {
    echo $tmp;
    return;
}
$info = array('error' => 0, 'date' => $r[0][0], 'title' => $s);
$userinfo = json_encode($info);
echo $userinfo;
?>

 

svn: http://php-rss-ajax.googlecode.com/svn/trunk/site_rs_baidu

源码下载:猛击此处

反馈可以留言拍砖,或者发邮箱里:zhidong10@foxmail.com

 

 

 

posted on 2013-03-17 00:11  靖儿  阅读(1226)  评论(2编辑  收藏  举报

导航