微博登陆python学习
学习链接:http://shrik3.com/2016/03/25/sina-login/
python3版本的代码
# -*- coding: utf-8 -*- import urllib.error import urllib.request import re import rsa import http.cookiejar #从前的cookielib import base64 import json import urllib import urllib.parse import binascii class UserLogin: # # url解码 def __init__(self,username,userpassword): self.username = username self.password = userpassword def get_encrypted_name(self): username_urllike = urllib.request.quote(self.username) username_encrypted = base64.b64encode(bytes(username_urllike,encoding='utf-8')) return username_encrypted.decode('utf-8') def get_prelogin_args(self): ''' 该函数用于模拟预登录过程,并获取服务器返回的 nonce , servertime , pub_key 等信息 ''' json_pattern = re.compile('\((.*)\)') url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&' + self.get_encrypted_name() + '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)' try: request = urllib.request.Request(url) response = urllib.request.urlopen(request) raw_data = response.read().decode('utf-8') json_data = json_pattern.search(raw_data).group(1) data = json.loads(json_data) return data except urllib.error as e: print("%d"%e.code) return None def get_encrypted_pw(self,data): rsa_e = 65537 #0x10001 pw_string = str(data['servertime']) + '\t' + str(data['nonce']) + '\n' + str(self.password) key = rsa.PublicKey(int(data['pubkey'],16),rsa_e) pw_encypted = rsa.encrypt(pw_string.encode('utf-8'), key) self.password = '' #清空password passwd = binascii.b2a_hex(pw_encypted) print(passwd) return passwd def enableCookies(self): # 建立一个cookies 容器 cookie_container = http.cookiejar.CookieJar() # 将一个cookies容器和一个HTTP的cookie的处理器绑定 cookie_support = urllib.request.HTTPCookieProcessor(cookie_container) # 创建一个opener,设置一个handler用于处理http的url打开 opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) # 安装opener,此后调用urlopen()时会使用安装过的opener对象 urllib.request.install_opener(opener) def build_post_data(self,raw): post_data = { "entry":"weibo", "gateway":"1", "from":"", "savestate":"7", "useticket":"1", "pagerefer":"http://passport.weibo.com/visitor/visitor?entry=miniblog&a=enter&url=http%3A%2F%2Fweibo.com%2F&domain=.weibo.com&ua=php-sso_sdk_client-0.6.14", "vsnf":"1", "su":self.get_encrypted_name(), "service":"miniblog", "servertime":raw['servertime'], "nonce":raw['nonce'], "pwencode":"rsa2", "rsakv":raw['rsakv'], "sp":self.get_encrypted_pw(raw), "sr":"1280*800", "encoding":"UTF-8", "prelt":"77", "url":"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack", "returntype":"META" } data = urllib.parse.urlencode(post_data).encode('utf-8') print (data) return data def login(self): url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)' self.enableCookies() data = self.get_prelogin_args() post_data = self.build_post_data(data) headers = { "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36" } try: request = urllib.request.Request(url=url,data=post_data,headers=headers) response = urllib.request.urlopen(request) html = response.read().decode('GBK') print(html) except urllib.error as e: print(e.code) p = re.compile('location\.replace\(\'(.*?)\'\)') p2 = re.compile(r'"userdomain":"(.*?)"') try: login_url = p.search(html).group(1) print(login_url) request = urllib.request.Request(login_url) response = urllib.request.urlopen(request) page = response.read().decode('utf-8') print(page) login_url = 'http://weibo.com/' + p2.search(page).group(1) request = urllib.request.Request(login_url) response = urllib.request.urlopen(request) final = response.read().decode('utf-8') print (final) print("Login success!") except: print('Login error!') return 0 userlogin = UserLogin('13031325732','zn123456') userlogin.login()
出现的一些问题:
1获取登陆名的b64encode编码和url的quote转化,得出的结果一定要和浏览器中的network比较是否正确
2请求路径的版本问题/学习链接中的是ssologin.js(v1.4.18),/而我现在(2018-04-15)却是ssologin.js(v1.4.19)/
但现在请求ssologin.js(v1.4.18)依然可以/存在一些猜测
学习心得:
首先登陆一般都会有加密处理,微博在输入用户名后请求得到一些验证所需的东西比如用于密码加密的公钥等等,
其次查看登陆按钮后的请求,确定请求的路径和参数,一般都会密码加密处理,设置handers和data和url后请求即可。最后是网页请求的重新定向问题,用正则表达式截取再请求即可。
说的好简单( ̄▽ ̄)"我搞了两三天,基础太薄弱,还有一些问题比如cookie的设施/