路飞学成-Python爬虫实战密训-第2章
1,本节学习体会、心得 :
本章主要是以实战为主,核心知识点都已经在第一章里讲的差不多了。发现第一张只要认真听并理解了的话,这个web微信的小项目就变的比较容易起来;大部分都比较容易,就有一个小坑就是有的微信需要请求wx2.qq.com地址 有的需要wx.qq.com地址。这也是在交了作业之后像给配评优炫耀下成果,结果他扫了之后登陆不上。尴尬!然后又各种打印找问题才发现了这个坑。然后,我的解决方法是在扫码成功后会得到一个跳转的url 然后 判断 wx2.qq.com是否包含在这个url字符串 如果为True 就把 wx2.qq.com存在session。如果false 就把wx.qq.com存入session。拼接请求url的时候把那部分用session替换就行了。挺简单,一次就成功了。
2,本节的知识点总结:
总结:
1. 通过谷歌浏览器查找要请求的url
2. 发送请求获取登陆二维码
3. 通过长轮训获取二维码是否被扫 如果被扫获取登陆状态 以及跳转的url 将登陆成功返回的xml转成 字典存至session 作为后续请求参数
4. 通过url 初始化数据 获取最近联系人(这里要获取cookie) 另外这一步要把SyncKey和自己的信息 等内容存至session后边作为请求参数
5.所有联系人(获取所有联系人需要带上cookie)
6.通过每个用户的UserName 发送数据 前端ajax提交数据的数据 接受处理作为参数发送大 send的url
7.通过前端ajax请求 长轮训获取新消息状态 如果有新消息 则请求消息url获取新信息 并把syncKey更新 一便监听下次的新消息
到此就结束了
下面贴出我的实例代码 给大家参考下:
后端的:
from flask import Flask,request,render_template,session,jsonify import time import requests import re,json from bs4 import BeautifulSoup app = Flask(__name__) app.secret_key = 'sadfds' def sup_text(text): # 自定一个的方法 用来把xml转换成字典 dic = {} soup = BeautifulSoup(text,'html.parser') div = soup.find(name='error') for item in div.find_all(recursive=False): dic[item.name] = item.text return dic @app.route('/login',methods=['GET','POST']) def login(): # 获取登陆二维码 if request.method == 'GET': ctime=str(int(time.time()*1000)) vurl = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN& = {ctime}' ret = requests.get(vurl) print(ret.text) wcd = re.findall('uuid = "(.*)";',ret.text)[0] session['wcd'] = wcd session['login_cookie'] = ret.cookies.get_dict() return render_template('login.html',wcd=wcd) @app.route('/check_login',methods=['GET']) def check_login(): # 验证是否扫码 如果被扫获取头像 # 继续验证用户是否确认登陆 如果确认登陆 跳转页面 resp = {'code':408} ctime = str(int(time.time())) wcd = session.get('wcd') uurl = "https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-1929552825&_={1}".format(wcd,ctime) ret = requests.get(uurl) print(ret.text) if "code=201" in ret.text: resp['code'] = 201 resp['img'] = re.findall("userAvatar = '(.*)';",ret.text)[0] elif "code=200" in ret.text: resp['code'] = 200 red_uri = re.findall('redirect_uri="(.*)";',ret.text)[0] # 这里判断微信即将请求的url if "wx2.qq.com" in red_uri: session['url'] = "wx2.qq.com" else: session['url'] = "wx.qq.com" red_uri = red_uri+"&fun=new&version=v2" cxml = requests.get(red_uri) txt = sup_text(cxml.text) # 自定一个方法 转换xml类型为dict数据 session['ticket_dic'] = txt session['ticket_cookie'] = cxml.cookies.get_dict() print(txt) return jsonify(resp) @app.route('/index') def index(): # 初始化 获取最近联系人 tic_dic = session.get('ticket_dic') print(tic_dic) turl = 'https://{1}/cgi-bin/mmwebwx-bin/webwxinit?r=-1946057529&lang=zh_CN&pass_ticket={0}'.format(tic_dic.get('pass_ticket'),session.get('url')) # turl = 'https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2129367627' ret = requests.post(turl, json={ 'BaseRequest':{ 'DeviceID':"e350337403480741", 'Sid':tic_dic.get('wxsid'), 'Skey':tic_dic.get('skey'), 'Uin':tic_dic.get('wxuin'), } } ) ret.encoding = 'utf-8' u_dic = json.loads(ret.text) # for it in u_dic['ContactList']: # print(it.get('NickName')) print(u_dic) session['u_dic'] = u_dic['SyncKey'] session['u_user'] = u_dic['User'] session['index_cookie'] = ret.cookies.get_dict() ctime = str(time.time()*1000) tic_dic = session.get('ticket_dic') #获取好友列表 # u_list = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&r={0}&seq=0&skey={1}".format(ctime,tic_dic.get('skey')) u_list = "https://{2}/cgi-bin/mmwebwx-bin/webwxgetcontact?r={0}&seq=0&skey={1}".format(ctime,tic_dic.get('skey'),session.get('url')) usr_li = requests.get(u_list,cookies=session.get('ticket_cookie')) usr_li.encoding = 'utf-8' user_list = usr_li.json() return render_template('index.html',u_dic=u_dic,user_list=user_list) @app.route('/headimg') def headimg(): #获取个人头像图片 user_dic = session.get('u_user') ticket_cookie = session.get('ticket_cookie') img_url = 'https://'+session.get('url')+user_dic['HeadImgUrl'] img_ret = requests.get(img_url,cookies=ticket_cookie,headers={'Host':session.get('url'),'Content-Type':'image/jpeg'}) return img_ret.content @app.route('/send',methods=['GET','POST']) def send(): # 发送消息 拼装url data 发送post tic_dic = session.get('ticket_dic') user = session.get('u_user') surl="https://{1}/cgi-bin/mmwebwx-bin/webwxsendmsg?lang=zh_CN&pass_ticket={0}".format(tic_dic.get('pass_ticket'),session.get('url')) touser = request.form.get('tou') cont = request.form.get('msg') ctime = str(time.time()*1000) data={ 'BaseRequest':{ 'DeviceID':"e092131569398417", 'Sid':tic_dic.get('wxsid'), 'Skey':tic_dic.get('skey'), 'Uin':tic_dic.get('wxuin') }, 'Msg':{ 'ClientMsgId':ctime, 'Content':cont, 'FromUserName':user.get('UserName'), 'LocalID':ctime, 'ToUserName':touser, 'Type':1 }, 'Scene':0 } ret = requests.post(surl,data=bytes(json.dumps(data,ensure_ascii=False),encoding='utf-8')) print(touser,cont,ret) return 'ss' @app.route('/get_msg') def get_msg(): # 查看是否有新消息 如果有就取回消息内容 error = { 'code':201} tic_dic = session.get('ticket_dic') u_dic = session.get('u_dic') sync = [] for ite in u_dic['List']: tem = "%s_%s"%(ite['Key'],ite['Val']) sync.append(tem) sync_str = "|".join(sync) print(sync_str) ctime = str(time.time()) gurl = "https://webpush.{0}/cgi-bin/mmwebwx-bin/synccheck".format(session.get('url')) data={ 'r': ctime, 'skey':tic_dic.get('skey'), 'sid': tic_dic.get('wxsid'), 'uin': tic_dic.get('wxuin'), 'deviceid': 'e543177474325387', 'synckey':sync_str, } ret = requests.get(gurl,params=data,cookies=session.get('ticket_cookie')) # 查看是否有新信息 print(ret.text) # 判断 如果有消息的话 就去取回新消息内容 if 'selector:"2"'in ret.text: wurl = "https://{3}/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}".format(tic_dic.get('wxsid'),tic_dic.get('skey'),tic_dic.get('pass_ticket'),session.get('url')) data_to = { 'BaseRequest':{ 'DeviceID':"e919572143178814", 'Sid':tic_dic.get('wxsid'), 'Skey':tic_dic.get('skey'), 'Uin': tic_dic.get('wxuin') }, 'SyncKey':u_dic, 'rr':ctime, } ms = requests.post(wurl,json=data_to) ms.encoding = 'utf-8' error['code'] = 200 ms_dic = json.loads(ms.text) try: error['msg'] = ms_dic['AddMsgList'][0] #这里有时候会报错 所以先try一下 except: pass session['u_dic'] = ms_dic['SyncKey'] # 更新SyncKey print(error) return jsonify(error) if __name__ == '__main__': app.run()
前端的:
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> <link type="text/css" rel="stylesheet" href="/static/css/index.css"> </head> <body> <div class="top"> <ul> <li class="headimg"><img src="/headimg"></li> <li>欢迎【{{ u_dic.User.NickName }}】</li> </ul> </div> <div class="left"> <div class="ti"><h3 class="n1">最近联系</h3><h3 class="n2">所有联系人</h3></div> <div style="clear: both"></div> <div class="list" style="overflow-y: auto;overflow-x: hidden; height: 80%;"> <div style="clear: both"></div> <ul class="min"> {% for usr in u_dic.ContactList %} <img src="/headimg"><li id="{{ usr.UserName }}" class="st{{ usr.UserName }}">{{ usr.NickName|safe }}</li> {% endfor %} </ul> <ul class="full"> {% for ul in user_list.MemberList %} <img src="/headimg"><li id="{{ ul.UserName }}" class="st{{ ul.UserName }}">{{ ul.NickName|safe }}</li> {% endfor %} </ul> </div> </div> <div class="right" style="margin-left: 220px;margin-top:120px;height: 700px; width: 800px;overflow: auto;"> <p>温馨提醒:点击联系人的名字就可以给对方发送消息哟</p> </div> <div class="mail"> <form id="fm"> <input type="hidden" name="tou"> <div class="tousr"><span style="display: inline-block;width: 50px;height: 40px;line-height: 40px;float: left">收信人:</span><span class="cont"></span></div> <div class="close">X</div> <div class="text"> <textarea name="msg" >请输入要发送的内容</textarea> <span class="sub">发送</span> </div> </form> </div> <script src="/static/jquery-3.3.1.min.js"></script> </body> <script> $(function () { get_msg(); $('.full').hide(); $('.n1').click(function () { $('.full').hide(); $('.min').show(); }); $('.n2').click(function () { $('.min').hide(); $('.full').show(); }); {#点击联系人用户名 发送消息#} $('.list li').click(function () { $('.mail').show(); var id = $(this).attr('id'); var usr = $(this).text(); $('.cont').text(usr); $('input[name="tou"]').val(id) }); $('.right').on('click','.sla',function () { $('.mail').show(); var id = $(this).attr('id'); var usr = $(this).attr('ar'); $('.cont').text(usr); $('input[name="tou"]').val(id) }); {#关闭发送消息窗口#} $('.close').click(function () { $('.mail').hide() }); {#发送消息#} $('.sub').click(function () { var fm = $('#fm').serialize(); $.ajax({ url:'/send', type:'post', data:fm, dataType:'json', success:function (msg) { } }) }); }); function get_msg() { $.ajax({ url:'/get_msg', type:'get', dataType:'JSON', success:function (msg) { if(msg.code == 200){ var text = "<h3 class='sla' ar="+msg.msg['FromUserName']+">来自:-》" +msg.msg['FromUserName']+"的消息(点击回复)</h3><p>消息内容:"+msg.msg['Content'] +"</p>"; $('.right').append(text); get_msg() }else{ get_msg() } } }) } </script> </html>