自学Python五 爬虫基础练习之SmartQQ协议
BAT站在中国互联网的顶端,引导着中国互联网的发展走向。。。既受到了多数程序员的关注,也在被我们所惦记着。。。
关于SmartQQ的协议来自HexBlog,根据他的博客我自己也一步一步的去分析,去尝试,自己不了解不知道的总是神秘的,如果你有这种好奇心,那么真相就只有一个。接下来我先把协议放出来,至于分析方法,以后有机会再谈谈。。。其实我也是个半吊子水平。。。谁知道下次改了协议还灵不灵呢!
登录之前,获取二维码:https://ssl.ptlogin2.qq.com/ptqrshow?appid=501004106&e=0&l=M&s=5&d=72&v=4&t=0.22925435146316886,refer为:https://ui.ptlogin2.qq.com/cgi-bin/login
循环获取二维码状态(是否失效,是否在手机上授权):https://ssl.ptlogin2.qq.com/ptqrlogin?webqq_type=10&remember_uin=1&login2qq=1&aid=501004106&u1=http%3A%2F%2Fw.qq.com%2Fproxy.html%3Flogin2qq%3D1%26webqq_type%3D10&ptredirect=0&ptlang=2052&daid=164&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=0-0-136435&mibao_css=m_webqq&t=undefined&g=1&js_type=0&js_ver=10139&login_sig=&pt_randsalt=0 refer为:https://ui.ptlogin2.qq.com/cgi-bin/login
获取cookie中ptwebqq:这次的url是手机扫描二维码之后得到的返回值。refer不变。
获取返回值vfwebqq:http://s.web2.qq.com/api/getvfwebqq?ptwebqq=" + ptwebqq + "&clientid=53999199&psessionid=&t=1446710396202。refer为http://s.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1。
成功登录,得到uin,psessionid:http://d.web2.qq.com/channel/login2,refer为http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2,数据为:"r=%7B%22ptwebqq%22%3A%22"+ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22%22%2C%22status%22%3A%22online%22%7D",host地址为:"d1.web2.qq.com"
根据得到的信息获取好友列表:http://s.web2.qq.com/api/get_user_friends2,数据为:data="r=%7B%22vfwebqq%22%3A%22"+vfwebqq+"%22%2C%22hash%22%3A%22"+__hash+"%22%7D",refer为:http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2
让我们来看看程序:
下面我们用python来尝试一下(分为两个文件,其中继续沿用我们的HttpClient类,以及WebQQ类,代码如下):
1 #HttpClient.py
# -*- coding: utf-8 -*- 2 import cookielib, urllib, urllib2, socket 3 4 class HttpClient: 5 __cookie = cookielib.CookieJar() 6 __req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie)) 7 __req.addheaders = [ 8 ('Accept', 'application/javascript, */*;q=0.8'), 9 ('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)') 10 ] 11 urllib2.install_opener(__req) 12 13 def Get(self, url, refer=None): 14 try: 15 req = urllib2.Request(url) 16 if not (refer is None): 17 req.add_header('Referer', refer) 18 return urllib2.urlopen(req, timeout=120).read() 19 except urllib2.HTTPError, e: 20 return e.read() 21 except socket.timeout, e: 22 return '' 23 except socket.error, e: 24 return '' 25 26 def GetWithOutRead(self, url, refer=None): 27 try: 28 req = urllib2.Request(url) 29 if not (refer is None): 30 req.add_header('Referer', refer) 31 return urllib2.urlopen(req, timeout=120) 32 except urllib2.HTTPError, e: 33 return e.read() 34 except socket.timeout, e: 35 return '' 36 except socket.error, e: 37 return '' 38 39 def Post(self, url, data, refer=None): 40 try: 41 #req = urllib2.Request(url, urllib.urlencode(data)) 42 req = urllib2.Request(url,data) 43 if not (refer is None): 44 req.add_header('Referer', refer) 45 return urllib2.urlopen(req, timeout=120).read() 46 except urllib2.HTTPError, e: 47 return e.read() 48 except socket.timeout, e: 49 return '' 50 except socket.error, e: 51 return '' 52 53 def Download(self, url, file): 54 output = open(file, 'wb') 55 output.write(urllib2.urlopen(url).read()) 56 output.close() 57 61 def getCookie(self, key): 62 for c in self.__cookie: 63 if c.name == key: 64 return c.value 65 return '' 66 67 def setCookie(self, key, val, domain): 68 ck = cookielib.Cookie(version=0, name=key, value=val, port=None, port_specified=False, domain=domain, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) 69 self.__cookie.set_cookie(ck)
1 #WebQQ.py 2 # -*- coding: utf-8 -*- 3 from Tkinter import * 4 from time import sleep 5 from HttpClient import HttpClient 6 import json,io 7 from multiprocessing import Process 8 import multiprocessing 9 from PIL import Image, ImageTk 10 class WebQQ(HttpClient): 11 def __init__(self): 12 self.__cookie = "" 13 self.__ptwebqq = "" 14 self.__vfwebqq = "" 15 self.__hash = "" 16 self.__uin = "" 17 self.__root = "" 18 self.__psessionid = "" 19 self.queue = multiprocessing.Queue() 20 21 #获取二维码 22 def __ptqrshow(self): 23 img = self.Get(url="https://ssl.ptlogin2.qq.com/ptqrshow?appid=501004106&e=0&l=M&s=5&d=72&v=4&t=0.4139144900254905") 24 if(img!=None): 25 data_stream = io.BytesIO(img) 26 imgfile = Image.open(data_stream) 27 p = Process(target=self._run_proc, args=(imgfile,)) 28 p.start() 29 print(u"二维码下载完毕,请尽快扫描...") 30 return True 31 else: 32 print(u"二维码下载失败") 33 return False 34 35 #检测扫码状态,登录进度 36 def __ptqrlogin(self): 37 res = self.Get("https://ssl.ptlogin2.qq.com/ptqrlogin?webqq_type=10&remember_uin=1"+ 38 "&login2qq=1&aid=501004106&u1=http%3A%2F%2Fw.qq.com%2Fproxy.html%3Flogin2qq%3D1%26webqq_type%3D10"+ 39 "&ptredirect=0&ptlang=2052&daid=164&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=0-0-136435"+ 40 "&mibao_css=m_webqq&t=undefined&g=1&js_type=0&js_ver=10139&login_sig=&pt_randsalt=0", 41 "https://ui.ptlogin2.qq.com/cgi-bin/login") 42 if(res!=None): 43 result = res.find("登录成功") 44 if(result==-1): 45 sleep(1) 46 47 return self.__ptqrlogin() 48 elif(result!=-1): 49 res = res.decode("UTF-8") 50 return self.__check_sig(res[res.find("http"):res.find(u"','0','登录成功!'")].encode()) 51 else: 52 return False 53 else: 54 return False 55 #获得ptwebqq cookie 56 def __check_sig(self,url): 57 res = self.Get(url=url,refer="https://ui.ptlogin2.qq.com/cgi-bin/login") 58 if(res!=None): 59 self.__ptwebqq = self.getCookie("ptwebqq") 60 self.__getvfwebqq() 61 return self.__login2() 62 else: 63 return False 64 #获得vfwebqq cookie 65 def __getvfwebqq(self): 66 res = self.Get(url="http://s.web2.qq.com/api/getvfwebqq?ptwebqq="+self.__ptwebqq+ 67 "&clientid=53999199&psessionid=&t=1446710396202", 68 refer="http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1") 69 if(res!=None): 70 jsn = json.loads(res) 71 self.__vfwebqq = jsn["result"]["vfwebqq"] 72 #登录 获取uin psessionid 73 def __login2(self): 74 data = "r=%7B%22ptwebqq%22%3A%22"+self.__ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22%22%2C%22status%22%3A%22online%22%7D" 75 76 res = self.Post(url = "http://d1.web2.qq.com/channel/login2", 77 data = data.encode(encoding="utf8"), 78 refer = "http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2") 79 if(res==None): 80 return False 81 jsn = json.loads(res) 82 if(jsn["retcode"]==0): 83 self.__uin = jsn["result"]["uin"] 84 self.__psessionid = jsn["result"]["psessionid"] 85 self.__hash = self.__friendsHash(self.__uin,self.__ptwebqq) 86 self.__get_user_friends() 87 return True 88 else: 89 return False 90 #好友的hash 参考HexBlog 91 def __friendsHash(self,uin,pt): 92 N=[0 for x in range(4)] 93 V=[0 for x in range(4)] 94 U=[0 for x in range(8)] 95 # 字符串转换为字符数组 96 k=pt.encode(encoding="UTF8") 97 n=["0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"] 98 for x in range(len(k)): 99 N[x%4]^=ord(k[x]) 100 x=int(uin) 101 V[0] = x >> 24 & 255 ^ 69; 102 V[1] = x >> 16 & 255 ^ 67; 103 V[2] = x >> 8 & 255 ^ 79; 104 V[3] = x & 255 ^ 75; 105 for x in range(8): 106 U[x]=(x%2==0) and N[x>>1] or V[x>>1] 107 result="" 108 for x in U: 109 result+=n[x>>4&15] 110 result+=n[x&15] 111 return result 112 #获取好友列表 113 def __get_user_friends(self): 114 data="r=%7B%22vfwebqq%22%3A%22"+self.__vfwebqq+"%22%2C%22hash%22%3A%22"+self.__hash+"%22%7D" 115 res=self.Post(url="http://s.web2.qq.com/api/get_user_friends2", 116 data=data.encode(encoding="utf8"), 117 refer="http://s.web2.qq.com/proxy.html?v=20130916001&callback=1&id=1") 118 if(res!=None): 119 # 记录好友列表 120 jsn=json.loads(res) 121 if(jsn["retcode"]==0): 122 # 正确返回列表后 123 for x in jsn["result"]["marknames"]: 124 print (x["markname"]) 125 #心跳包并接收消息,现在不能用了 126 #def __poll(self): 127 # data="r=%7B%22ptwebqq%22%3A%22"+self.__ptwebqq+"%22%2C%22clientid%22%3A53999199%2C%22psessionid%22%3A%22"+self.__psessionid +"%22%2C%22key%22%3A%22%22%7D" 128 # res=self.Post(url="http://d1.web2.qq.com/channel/poll2", 129 # data=data.encode(encoding="utf8"), 130 # refer="http://d1.web2.qq.com/proxy.html?v=20151105001&callback=1&id=2") 131 # if(res!=None): 132 # print res 133 134 def __check_queue(self): 135 try: 136 out = self.queue.get_nowait() 137 if out == 'stop': 138 self.__do_stop() 139 return 140 # Could check for other commands here, too 141 except : 142 pass 143 self.__root.after(100, self.__check_queue) 144 145 def __stop(self): 146 self.queue.put('stop') 147 148 def __do_stop(self): 149 self.__root.destroy() 150 151 def _run_proc(self,imgfile): 152 self.__root = Tk() 153 result = ImageTk.PhotoImage(imgfile) 154 label = Label(self.__root, image=result) 155 label.pack() 156 self.__root.after(100, self.__check_queue) 157 self.__root.mainloop() 158 def run(self): 159 if(self.__ptqrshow()): 160 if(self.__ptqrlogin()): 161 print(u"登录成功") 162 self.__stop() 163 164 if __name__ =="__main__": 165 qq=WebQQ() 166 qq.run()