微信公众号文章列表抓取
最近需要做一个公众号文章阅读奖励积分的功能,首先就得获取到文章列表,网上查了资料后,感觉微信公众平台图文编辑那的限制会少一点,可以满足需求,现在记录一下
一:首先创建一个登陆表单,包含账户和密码,点击登陆后台请求对应接口获得一个二维码图片,然后js循环刷新显示二维码直到扫码成功(账号密码为微信公众平台的账号密码)
<!DOCTYPE HTML> <html> <head> <meta charset="utf-8"> </head> <body> <input id="name" type="text" placeholder="账户"> <input id="pwd" type="password" placeholder="密码"> <input onclick="showimg()" type="button" value="登录"> <div id="showimg" style="display:none">正在获取...</div> <script src="http://libs.baidu.com/jquery/1.7.2/jquery.min.js"></script> <script> function showimg() { $("#showimg").show(); name = $("#name").val(); pwd = $("#pwd").val(); t = setInterval("img()", 3000); $.post("./wx.php?method=login", {'name':name, 'pwd':pwd},function(rsg){ data = jQuery.parseJSON(rsg); if(!data.code) { window.location.href = "./wx.php?method=getwx"; }else{ $("#showimg").hide(); alert(data.msg); } }) } function img() { num = Math.round(Math.random()); $("#showimg").html("<img src='./wx/qrcode_tmall.png?"+num+"'>") } </script> </body> </html>
二:控制器接收请求处理(Wx.php)
<?php namespace app; use app\Wxlogin; class Wx { //登陆 function login() { $name = $_POST['name']; if(!$name) return $this->ajax(['code'=>2, 'msg'=>'账号不能为空']); $pwd = $_POST['pwd']; if(!$pwd) return $this->ajax(['code'=>2, 'msg'=>'密码不能为空']); $arr = array( 'account' => $name, 'password' => $pwd, 'key' => "tmall", ); $wx = new Wxlogin(); $wx->init($arr); if(!$wx->getToken()){ return $this->ajax(['code'=>1, 'msg'=>'登陆失败'], ); } return $this->ajax(['code'=>0, 'msg'=>'登陆成功']); } //搜索公众号(备用,只查看单个公众号无需使用,fackid固定即可;搜索公众号查看会用到此方法,然后需要把getwx里的获取fackid更改一下【session或者参数形式都可以】) function getfackid(){ $search = isset($_GET['search']) ? $_GET['search'] : '内涵段子'; $wx = new Wxlogin(); $arr = array( 'key' => "tmall", ); //没有token就去登陆扫码 if(!$wx->getToken()){ header('location:/'); } //开始位置 $size = 5; $nowpage = isset($_GET['page']) ? $_GET['page'] : 1; $nowpage = $nowpage<1?1:$nowpage; $begin = ($nowpage-1)*$size; //初始化并获取数据 $wx->init($arr); $data = $wx->getfackid($begin, $search);if($data['base_resp']['ret'] != 0) { file_put_contents("./wx/token.txt", ''); switch ($data['base_resp']['ret']) { case '200013': echo '请求太频繁了'; break; case '200003': echo '无效会话'; break; default: echo '请求错误'; break; } return; }else{ //总页数 $psum = ceil($data['app_msg_cnt']/$size); $list = $data['app_msg_list']; } return $this->ajax($list); } //获取列表(分页,页面条数不足的代表发布者删除过一些) function getwx(){ $wx = new Wxlogin(); $arr = array( 'key' => "tmall", ); //没有token就去登陆扫码 if(!$wx->getToken()){ header('location:/'); } //登陆成功删除二维码 if(file_exists('./wx/qrcode_tmall.png')) { unlink('./wx/qrcode_tmall.png'); } //开始位置 $size = 5; $nowpage = isset($_GET['page']) ? $_GET['page'] : 1; $nowpage = $nowpage<1?1:$nowpage; $begin = ($nowpage-1)*$size; //初始化并获取数据 $wx->init($arr); $search = isset($_GET['search']) ? $_GET['search'] : 1; $data = $wx->getwx($begin, $search); if($data['base_resp']['ret'] != 0) { file_put_contents("./wx/token.txt", ''); switch ($data['base_resp']['ret']) { case '200013': echo '请求太频繁了'; break; case '200003': echo '无效会话'; break; default: echo '请求错误'; break; } return; }else{ //总页数 $psum = ceil($data['app_msg_cnt']/$size); $list = $data['app_msg_list']; } return $this->ajax([$data['app_msg_cnt'], $search, $nowpage, $psum, $list]); } function ajax($data) { return json_encode($data, JSON_UNESCAPED_UNICODE); } } //自动加载 spl_autoload_register(function ($class) { $arr = explode('\\', $class); if(array_shift($arr) == 'app') { include implode('\\', $arr) . '.php'; } }); $wx = new Wx(); $method = $_GET['method']; echo $wx->$method();
三:请求微信接口,供Wx.php调用(Wxlogin.php)
<?php namespace app; class Wxlogin { //--------------------------------------------------------LOGIN START private $_apis = [ "host" => "https://mp.weixin.qq.com", "login" => "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=startlogin", "qrcode" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=getqrcode&m=4300", "loginqrcode" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=ask&token=&lang=zh_CN&f=json&ajax=1", "loginask" => "https://mp.weixin.qq.com/cgi-bin/loginqrcode?action=ask&token=&lang=zh_CN&f=json&ajax=1&random=", "loginauth" => "https://mp.weixin.qq.com/cgi-bin/loginauth?action=ask&token=&lang=zh_CN&f=json&ajax=1", "bizlogin" => "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=login&lang=zh_CN" ]; public $fakeid = "MzIxMTU3MDA4Ng=="; private $_redirect_url = ""; private $_key = ""; //获取cookie保存文件,用于访问接口 private function _getCookieFile(){ return "./wx/cookie_{$this->_key}.text"; } //获取二维码路径 private function _getSavePath(){ return './wx/'.$this->_qrcodeName(); } //获取二维码名称 private function _qrcodeName(){ return "qrcode_{$this->_key}.png"; } //记录日志 private function _log($log_content){ $log_filename = './wx'; !is_dir($log_filename) && mkdir($log_filename, 0755, true); file_put_contents($log_filename.'/wx.log', '['.date("Y-m-d H:i:s").']' .PHP_EOL . $log_content . PHP_EOL."------------------------ --------------------------".PHP_EOL, FILE_APPEND); } public function getToken(){ return file_get_contents("./wx/token.txt"); } public function setToken($token){ file_put_contents("./wx/token.txt", $token); } public function init($options){ if(!isset($options["key"])){ die("Key is Null!"); } $this->_key = $options["key"]; if($this->getToken()){ $this->_log("HAS Token !"); return; }else{ $this->fetch("https://mp.weixin.qq.com/","","text"); $this->_log("start login!!"); $this->start_login($options); } } private function start_login($options){ $_res = $this->_login($options["account"],$options["password"]); if($_res["code"]){ $this->_log($_res["msg"]); return; } //保存二维码 $this->_saveQRcode(); $_ask_api = $this->_apis["loginask"]; $_input["refer"] = $this->_redirect_url; $_index = 1; while(true){ if($_index>20){ $this->_log("超时"); break; } $_res = $this->fetch($_ask_api.$this->getWxRandomNum(),$_input); $_status = $_res["status"]; if($_status==1){ if($_res["user_category"]==1){ $_ask_api = $this->_apis["loginauth"]; }else{ $this->_log("Login success"); break; } }else if($_status==4){ $this->_log("已经扫码"); }else if($_status==2){ $this->_log("管理员拒绝"); break; }else if($_status==3){ $this->_log("登录超时"); break; }else{ if($_ask_api==$this->_apis["loginask"]){ $this->_log("请打开test.jpg,用微信扫码"); }else{ $this->_log("等待确认"); } } sleep(2); $_index++; } $this->_log("开始验证"); $_input["post"] = ["lang"=>"zh_CN","f"=>"json","ajax"=>1,"random"=>$this->getWxRandomNum(),"token"=>""]; $_input["refer"] = $this->_redirect_url; $_res = $this->fetch($this->_apis["bizlogin"],$_input); $this->_log(print_r($_res,true)); if($_res["base_resp"]["ret"]!=0){ $this->_log("error = ".$_res["base_resp"]["err_msg"]); return ; } $redirect_url = $_res["redirect_url"];//跳转路径 if(preg_match('/token=([\d]+)/i', $redirect_url,$match)){//获取cookie $this->setToken($match[1]); } $this->_log("验证成功,token: ".$this->getToken()); } //下载二维码 private function _saveQRcode(){ $_input["refer"] = $this->_redirect_url; $_res = $this->fetch($this->_apis["qrcode"],$_input,"text"); $fp = fopen($this->_getSavePath(), "wb+") or die("open fails"); fwrite($fp,$_res) or die("fwrite fails"); fclose($fp); } private function _login($_username,$_password){ $_input["post"] = array( 'username' => $_username, 'pwd' => md5($_password), 'f' => 'json', 'imgcode' => "" ); $_input["refer"] = "https://mp.weixin.qq.com"; $_res = $this->fetch($this->_apis["login"],$_input); if($_res["base_resp"]["ret"]!==0){ return ['code'=>'1', 'msg'=>$_res["base_resp"]["err_msg"]]; } $this->_redirect_url = "https://mp.weixin.qq.com".$_res["redirect_url"];//跳转路径 return ['code'=>'0', 'msg'=>'ok']; } function getWxRandomNum(){ return "0.".mt_rand(1000000000000000,9999999999999999); } function getfackid($begin, $search){ $url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?action=search_biz&begin=$begin&count=5&query=$search&token=".$this->getToken()."&lang=zh_CN&f=json&ajax=1"; return $this->fetch($url); } function getwx($begin, $search){ $url = "https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=$begin&count=5&fakeid=".$this->fakeid."&type=9&query={$search}&token=".$this->getToken()."&lang=zh_CN&f=json&ajax=1"; return $this->fetch($url); } /** * @param $url * @param null $_input * @param string $data_type * @return mixed * $_input= ["post"=>[],"refer"=>"",cookiefile=''] */ function fetch( $url, $_input=null, $data_type='json') { $ch = curl_init(); $useragent = isset($_input['useragent']) ? $_input['useragent'] : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'; curl_setopt( $ch, CURLOPT_URL, $url ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); curl_setopt( $ch, CURLOPT_AUTOREFERER, true ); curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $ch, CURLOPT_POST, isset($_input['post']) ); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); //不验证证书 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); //不验证证书 if( isset($_input['post']) ) curl_setopt( $ch, CURLOPT_POSTFIELDS, $_input['post'] ); if( isset($_input['refer']) ) curl_setopt( $ch, CURLOPT_REFERER, $_input['refer'] ); curl_setopt( $ch, CURLOPT_USERAGENT, $useragent ); curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, ( isset($_input['timeout']) ? $_input['timeout'] : 5 ) ); curl_setopt( $ch, CURLOPT_COOKIEJAR, ( isset($_input['cookiefile']) ? $_input['cookiefile'] : $this->_getCookieFile() )); curl_setopt( $ch, CURLOPT_COOKIEFILE, ( isset($_input['cookiefile']) ? $_input['cookiefile'] : $this->_getCookieFile() )); $result = curl_exec( $ch ); curl_close( $ch ); if ($data_type == 'json') { $result = json_decode($result,true); } return $result; } }
注:fakeid是要抓去的公众号的标识(调用本文的getfackid也可以),也可在公众平台的图文编辑里获取,步骤如下:
1:草稿箱内的写新图文
2:先打开浏览器的开发者工具(要从请求中查看),然后点击超链接,输入要查询的公众号,点击搜索
3:在开发者工具中查看Network一栏中点击searchbiz...这一栏查看fakeid
补充:
因为我是获取单个公众号,所以没有用getfackid方法,有做公众号搜索模式的,可以调用上面的getfackid方法获取公众号fackid
思路:获取到公众号列表后展现给前端,前端点击选择提交给后端,后端将对应的fackid存入session,然后获取文章列表的接口判断是否存在session,存在则获取文章列表