python 模拟浏览器
mport urllib.request as ul
import urllib.parse as uz
import http.cookiejar as cookielib
import json
from json import loads
import os
import random
import chardet
import stat
import zlib
c=cookielib.LWPCookieJar()#先把cookie对象存储为cookiejar的对象
cookie = ul.HTTPCookieProcessor(c)#把cookiejar对象转换为一个handle
opener = ul.build_opener(cookie)#建立一个模拟浏览器,需要handle作为参数
ul.install_opener(opener)#安装一个全局模拟浏览器,代表无论怎么访问都是一个浏览器操作而不是分开获取验证码等msg
path = os.path.abspath('..')+"\\filedir"
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
codepos = {"1":"45,45","2":"120,45","3":"180,45","4":"255,45","5":"45,120","6":"120,120","7":"180,120","8":"255,120"}
def qpRun():
get_img64()
code = input('输入验证码:')
codeary = code.split(",")
resultcode = ''
for c in codeary:
resultcode+=codepos[c]+","
resultcode=resultcode[:len(resultcode)-1]
code = checkCode(resultcode)
print(code)
logins(resultcode)
getCpData()
checkuser()
submitOrderRequest()
initDc()
#获取验证码
def get_img64():
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
url="https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&1542418445538&_=1542418010226"
req = ul.Request(url)
req.headers=headers
picFile = opener.open(req).read()
with open(path+"\\codeimg\\img.jpg",'wb') as f:
f.write(picFile)
def checkCode(codes):
url="https://kyfw.12306.cn/passport/captcha/captcha-check?answer=%s&rand=sjrand&login_site=E&_=1542418010227"%codes
print(url)
req = ul.Request(url)
req.headers=headers
resp = opener.open(req)
isgzip = resp.headers.get("Content-Encoding")
resphtml = resp.read()
if "gzip" == isgzip:
resphtml = zlib.decompress(resphtml,16+zlib.MAX_WBITS)
resphtml = resphtml.decode("utf-8")
print(resphtml)
respjson = json.loads(resphtml)
if "result_code" in respjson.keys():
return respjson["result_code"]
else:
return 5
def logins(codes):
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
url = "https://kyfw.12306.cn/passport/web/login"
req= ul.Request(url)
req.headers=headers
req.method="POST"
data = {'username':'11111',
'password':'11111',
'appid':'otn',
'answer':codes,
}
data = uz.urlencode(data).encode("utf-8")
resp = opener.open(req,data)
isgzip = resp.headers.get("Content-Encoding")
resphtml = resp.read()
if "gzip" == isgzip:
resphtml = zlib.decompress(resphtml,16+zlib.MAX_WBITS)
resphtml = resphtml.decode("utf-8")
print(resphtml)
respjson = json.loads(resphtml)
if "result_code" in respjson.keys():
return respjson["result_code"]
else:
return 1
def getCpData():
url="https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2018-11-18&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=HFH&purpose_codes=ADULT"
req = ul.Request(url)
req.headers=headers
resp= opener.open(req)
resphtml = resp.read()
with open(path+"\\hcpdata\\getCpData.txt",'wb') as f:
f.write(resphtml)
#检查用户返回为空
def checkuser():
url = "https://kyfw.12306.cn/otn/login/checkUser"
req=ul.Request(url)
req.headers=headers
req.method="POST"
data = {
"_json_att":""
}
data = uz.urlencode(data).encode("utf-8")
opener.open(req,data)
def initDc():
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type':'application/x-www-form-urlencoded',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
url = "https://kyfw.12306.cn/otn/confirmPassenger/initDc"
req = ul.Request(url)
req.headers=headers
req.method="POST"
data = {
"_json_att":""
}
data = uz.urlencode(data).encode("utf-8")
resp=opener.open(req,data)
resulthtml = resp.read()
with open(path+"\\initDc\\initDc.txt",'wb') as f:
f.write(resulthtml)
def getJs():
url = "https://kyfw.12306.cn/otn/HttpZF/GetJS"
req = ul.Request(url)
resp=opener.open(req)
def getPassengerDTOs():
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
url = "https://kyfw.12306.cn/otn/confirmPassenger/getPassengerDTOs"
data={
'_json_att':'',
'REPEAT_SUBMIT_TOKEN':'70230eed00b43c59b542c2caded6f8a7'
}
req = ul.Request(url)
req.headers=headers
req.method="POST"
data = uz.urlencode(data).encode("utf-8")
resp=opener.open(req,data)
resulthtml = resp.read()
with open(path+"\\getPassengerDTOs\\getPassengerDTOs.txt",'wb') as f:
f.write(resulthtml)
def getPassCodeNew():
headers = {
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=passenger&rand=randp&0.7446971311404074"
req = ul.Request(url)
req.headers=headers
picFile = opener.open(req).read()
with open(path+"\\getPassCodeNew\\img.jpg",'wb') as f:
f.write(picFile)