微信公众号文章列表抓取

最近需要做一个公众号文章阅读奖励积分的功能,首先就得获取到文章列表,网上查了资料后,感觉微信公众平台图文编辑那的限制会少一点,可以满足需求,现在记录一下

一:首先创建一个登陆表单,包含账户和密码,点击登陆后台请求对应接口获得一个二维码图片,然后js循环刷新显示二维码直到扫码成功(账号密码为微信公众平台的账号密码)

<!DOCTYPE HTML>
<html>
<head>
    <meta charset="utf-8">
</head>
<body>
    <input id="name" type="text" placeholder="账户">
    <input id="pwd" type="password" placeholder="密码"> 
    <input onclick="showimg()" type="button" value="登录">
    <div id="showimg" style="display:none">正在获取...</div>
<script  src="http://libs.baidu.com/jquery/1.7.2/jquery.min.js"></script>
<script>
    function showimg() {
        $("#showimg").show();
        name = $("#name").val();
        pwd = $("#pwd").val();
        t = setInterval("img()", 3000);
        $.post("./wx.php?method=login", {'name':name, 'pwd':pwd},function(rsg){
            data = jQuery.parseJSON(rsg);
            if(!data.code) {
                window.location.href = "./wx.php?method=getwx";
            }else{
                $("#showimg").hide();
                alert(data.msg);
            }
        })
    }
    function img() {
        num = Math.round(Math.random());
        $("#showimg").html("<img src='./wx/qrcode_tmall.png?"+num+"'>")
    }
</script>
</body>
</html>

二:控制器接收请求处理(Wx.php)

<?php
namespace app;

use app\Wxlogin;

class Wx
{
    //登陆
    function login() {
        $name = $_POST['name'];
        if(!$name)
            return $this->ajax(['code'=>2, 'msg'=>'账号不能为空']);
        $pwd = $_POST['pwd'];
        if(!$pwd)
            return $this->ajax(['code'=>2, 'msg'=>'密码不能为空']);
        $arr = array(
          'account'  => $name,
          'password' => $pwd,
          'key'    => "tmall",
        );
        $wx = new Wxlogin();
        $wx->init($arr);
        if(!$wx->getToken()){
            return $this->ajax(['code'=>1, 'msg'=>'登陆失败'], );
        }
        return $this->ajax(['code'=>0, 'msg'=>'登陆成功']);
    }

    //搜索公众号(备用,只查看单个公众号无需使用,fackid固定即可;搜索公众号查看会用到此方法,然后需要把getwx里的获取fackid更改一下【session或者参数形式都可以】)
    function getfackid(){
        $search = isset($_GET['search']) ? $_GET['search'] : '内涵段子';
        $wx = new Wxlogin();
        $arr = array(
          'key'    => "tmall",
        );
        //没有token就去登陆扫码
        if(!$wx->getToken()){
            header('location:/');
        }
        //开始位置
        $size = 5;
        $nowpage = isset($_GET['page']) ? $_GET['page'] : 1;
        $nowpage = $nowpage<1?1:$nowpage;
        $begin = ($nowpage-1)*$size;
        //初始化并获取数据
        $wx->init($arr);
        $data = $wx->getfackid($begin, $search);if($data['base_resp']['ret'] != 0) {
            file_put_contents("./wx/token.txt", '');
            switch ($data['base_resp']['ret']) {
                case '200013':
                    echo '请求太频繁了';
                    break;
                case '200003':
                    echo '无效会话';
                    break;
                default:
                    echo '请求错误';
                    break;
            }
            return;
        }else{
            //总页数
            $psum = ceil($data['app_msg_cnt']/$size);
            $list = $data['app_msg_list'];
        }
        return $this->ajax($list);
    }

    //获取列表(分页,页面条数不足的代表发布者删除过一些)
    function getwx(){
        $wx = new Wxlogin();
        $arr = array(
          'key'    => "tmall",
        );
        //没有token就去登陆扫码
        if(!$wx->getToken()){
            header('location:/');
        }
        //登陆成功删除二维码
        if(file_exists('./wx/qrcode_tmall.png')) {
            unlink('./wx/qrcode_tmall.png');
        }
        //开始位置
        $size = 5;
        $nowpage = isset($_GET['page']) ? $_GET['page'] : 1;
        $nowpage = $nowpage<1?1:$nowpage;
        $begin = ($nowpage-1)*$size;
        //初始化并获取数据
        $wx->init($arr);
        $search = isset($_GET['search']) ? $_GET['search'] : 1;
        $data = $wx->getwx($begin, $search);
        if($data['base_resp']['ret'] != 0) {
            file_put_contents("./wx/token.txt", '');
            switch ($data['base_resp']['ret']) {
                case '200013':
                    echo '请求太频繁了';
                    break;
                case '200003':
                    echo '无效会话';
                    break;
                default:
                    echo '请求错误';
                    break;
            }
            return;
        }else{
            //总页数
            $psum = ceil($data['app_msg_cnt']/$size);
            $list = $data['app_msg_list'];
        }
        return $this->ajax([$data['app_msg_cnt'], $search, $nowpage, $psum, $list]);
    }

    function ajax($data)
    {
        return json_encode($data, JSON_UNESCAPED_UNICODE);
    }

}

//自动加载
spl_autoload_register(function ($class) {
    $arr = explode('\\', $class);
    if(array_shift($arr) == 'app') {
        include implode('\\', $arr) . '.php';
    }
});

$wx = new Wx();
$method = $_GET['method'];
echo $wx->$method();

 

三:请求微信接口,供Wx.php调用(Wxlogin.php)

<?php
namespace app;

class Wxlogin
{
    //--------------------------------------------------------LOGIN START
    private $_apis = [
        "host" => "https://mp.weixin.qq.com",
        "login" => "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=startlogin",
        "qrcode" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=getqrcode&m=4300",
        "loginqrcode" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=ask&token=&lang=zh_CN&f=json&ajax=1",
        "loginask" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=ask&token=&lang=zh_CN&f=json&ajax=1&random=",
        "loginauth" => "https://mp.weixin.qq.com/cgi-bin/loginauth?action=ask&token=&lang=zh_CN&f=json&ajax=1",
        "bizlogin" => "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=login&lang=zh_CN"
    ];
    public $fakeid = "MzIxMTU3MDA4Ng==";
    private $_redirect_url = "";
    private $_key = "";

    //获取cookie保存文件,用于访问接口
    private function _getCookieFile(){
        return "./wx/cookie_{$this->_key}.text";
    }

    //获取二维码路径
    private function _getSavePath(){
        return './wx/'.$this->_qrcodeName();
    }

    //获取二维码名称
    private function _qrcodeName(){
        return "qrcode_{$this->_key}.png";
    }

    //记录日志
    private function _log($log_content){
        $log_filename = './wx';
        !is_dir($log_filename) && mkdir($log_filename, 0755, true);

        file_put_contents($log_filename.'/wx.log', '['.date("Y-m-d H:i:s").']' .PHP_EOL . $log_content . PHP_EOL."------------------------ --------------------------".PHP_EOL, FILE_APPEND);
    }

    public function getToken(){
        return file_get_contents("./wx/token.txt");
    }
    public function setToken($token){
        file_put_contents("./wx/token.txt", $token);
    }
    public function init($options){
        if(!isset($options["key"])){
            die("Key is Null!");
        }
        $this->_key = $options["key"];
        if($this->getToken()){
            $this->_log("HAS Token !");
            return;
        }else{
            $this->fetch("https://mp.weixin.qq.com/","","text");
            $this->_log("start login!!");
            $this->start_login($options);
        }
    }
    private function start_login($options){
        $_res = $this->_login($options["account"],$options["password"]);
        if($_res["code"]){
            $this->_log($_res["msg"]);
            return;
        }
        //保存二维码
        $this->_saveQRcode();
        $_ask_api = $this->_apis["loginask"];
        $_input["refer"] = $this->_redirect_url;
        $_index = 1;
        while(true){
            if($_index>20){
                $this->_log("超时");
                break;
            }
            $_res = $this->fetch($_ask_api.$this->getWxRandomNum(),$_input);
            $_status = $_res["status"];
            if($_status==1){
                if($_res["user_category"]==1){
                    $_ask_api = $this->_apis["loginauth"];
                }else{
                    $this->_log("Login success");
                    break;
                }
            }else if($_status==4){
                $this->_log("已经扫码");
            }else if($_status==2){
                $this->_log("管理员拒绝");
                break;
            }else if($_status==3){
                $this->_log("登录超时");
                break;
            }else{
                if($_ask_api==$this->_apis["loginask"]){
                    $this->_log("请打开test.jpg,用微信扫码");
                }else{
                    $this->_log("等待确认");
                }
            }
            sleep(2);
            $_index++;
        }
        $this->_log("开始验证");
        $_input["post"] = ["lang"=>"zh_CN","f"=>"json","ajax"=>1,"random"=>$this->getWxRandomNum(),"token"=>""];
        $_input["refer"] = $this->_redirect_url;
        $_res = $this->fetch($this->_apis["bizlogin"],$_input);
        $this->_log(print_r($_res,true));
        if($_res["base_resp"]["ret"]!=0){
            $this->_log("error = ".$_res["base_resp"]["err_msg"]);
            return ;
        }
        $redirect_url = $_res["redirect_url"];//跳转路径
        if(preg_match('/token=([\d]+)/i', $redirect_url,$match)){//获取cookie
            $this->setToken($match[1]);
        }
        $this->_log("验证成功,token: ".$this->getToken());
    }
    //下载二维码
    private function _saveQRcode(){
        $_input["refer"] = $this->_redirect_url;
        $_res = $this->fetch($this->_apis["qrcode"],$_input,"text");
        $fp = fopen($this->_getSavePath(), "wb+") or die("open fails");
        fwrite($fp,$_res) or die("fwrite fails");
        fclose($fp);
    }

    private function _login($_username,$_password){
        $_input["post"] = array(
            'username' => $_username,
            'pwd' => md5($_password),
            'f' => 'json',
            'imgcode' => ""
        );
        $_input["refer"] = "https://mp.weixin.qq.com";
        $_res = $this->fetch($this->_apis["login"],$_input);
        if($_res["base_resp"]["ret"]!==0){
            return ['code'=>'1', 'msg'=>$_res["base_resp"]["err_msg"]];
        }
        $this->_redirect_url = "https://mp.weixin.qq.com".$_res["redirect_url"];//跳转路径
        return ['code'=>'0', 'msg'=>'ok'];
    }

    function getWxRandomNum(){
        return "0.".mt_rand(1000000000000000,9999999999999999);
    }

    function getfackid($begin, $search){
        $url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?action=search_biz&begin=$begin&count=5&query=$search&token=".$this->getToken()."&lang=zh_CN&f=json&ajax=1";
        return $this->fetch($url);
    }

    function getwx($begin, $search){
        $url = "https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=$begin&count=5&fakeid=".$this->fakeid."&type=9&query={$search}&token=".$this->getToken()."&lang=zh_CN&f=json&ajax=1";
        return $this->fetch($url);
    }

    /**
    * @param $url
    * @param null $_input
    * @param string $data_type
    * @return mixed
    * $_input= ["post"=>[],"refer"=>"",cookiefile='']
    */
    function fetch( $url, $_input=null, $data_type='json') {
        $ch = curl_init();
        $useragent = isset($_input['useragent']) ? $_input['useragent'] : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2';
        curl_setopt( $ch, CURLOPT_URL, $url );
        curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
        curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
        curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
        curl_setopt( $ch, CURLOPT_POST, isset($_input['post']) );
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //不验证证书
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //不验证证书
        if( isset($_input['post']) ) curl_setopt( $ch, CURLOPT_POSTFIELDS, $_input['post'] );
        if( isset($_input['refer']) ) curl_setopt( $ch, CURLOPT_REFERER, $_input['refer'] );
        curl_setopt( $ch, CURLOPT_USERAGENT, $useragent );
        curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, ( isset($_input['timeout']) ? $_input['timeout'] : 5 ) );
        curl_setopt( $ch, CURLOPT_COOKIEJAR, ( isset($_input['cookiefile']) ? $_input['cookiefile'] : $this->_getCookieFile() ));
        curl_setopt( $ch, CURLOPT_COOKIEFILE, ( isset($_input['cookiefile']) ? $_input['cookiefile'] : $this->_getCookieFile() ));
        $result = curl_exec( $ch );
        curl_close( $ch );
        if ($data_type == 'json') {
            $result = json_decode($result,true);
        }
        return $result;
    }
}

 注:fakeid是要抓去的公众号的标识(调用本文的getfackid也可以),也可在公众平台的图文编辑里获取,步骤如下:

1:草稿箱内的写新图文

 

 

2:先打开浏览器的开发者工具(要从请求中查看),然后点击超链接,输入要查询的公众号,点击搜索

3:在开发者工具中查看Network一栏中点击searchbiz...这一栏查看fakeid

补充:

因为我是获取单个公众号,所以没有用getfackid方法,有做公众号搜索模式的,可以调用上面的getfackid方法获取公众号fackid
思路:获取到公众号列表后展现给前端,前端点击选择提交给后端,后端将对应的fackid存入session,然后获取文章列表的接口判断是否存在session,存在则获取文章列表

posted @ 2021-01-15 17:45  夜色0510  阅读(2084)  评论(0编辑  收藏  举报