三种urllib实现网页下载,含cookie模拟登陆

coding=UTF-8

import re
import urllib.request, http.cookiejar, urllib.parse

#
# print('---------------第一种方法----------------------')
# URL = 'https://baike.baidu.com/item/%E5%B7%B4%E6%B2%99%E5%B0%94%C2%B7%E9%98%BF%E8%90%A8%E5%BE%B7/2867946?fromtitle=%E9%98%BF%E8%90%A8%E5%BE%B7&fromid=9693472'
# response = urllib.request.urlopen(URL)
# if response.getcode() == 200:
#     conf = response.read()
#     print(conf)
# else:
#     print('Fail')
#
# print('---------------第二种方法----------------------')
# # 创建 request 对象
# request = urllib.request.Request(URL)
#
# # 封装 request 对象
# request.add_header('User-Agent', 'Mozilla/5.0')
#
# # 发送带头信息的请求
# response1 = urllib.request.urlopen(request)
# if response1.getcode() == 200:
#     conf = response1.read()
#     print(conf)
# else:
#     print('Fail')

print('---------------第三种方法----------------------')
URL2 = 'http://lczl.cnki.net/jbdetail/index?query=1'
URL3 = 'http://r.cnki.net/Klogin/Login.aspx?ReturnUrl=http://lczl.cnki.net/jbdetail/index?query=1'
# 创建cookieJar作为cookie容器
cj = http.cookiejar.CookieJar()

# 创建一个opener
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

# 给urllib.request安装opener
urllib.request.install_opener(opener)

# 封装 request 对象
request1 = urllib.request.urlopen(URL2)


def getVIEWSTATE(data):
    cer = re.compile('name=\"__VIEWSTATE\" id=\"__VIEWSTATE\" value=\"(.*)\"', flags=0)
    strlist = cer.findall(data)
    # print(data)
    return strlist[0]


VIEWSTATE = getVIEWSTATE(request1.read().decode())
# print(VIEWSTATE)
data = {'__VIEWSTATE': VIEWSTATE.encode(), 'userName': '345666561@qq.com', 'passWord': '215501',
        'iplogin': 0}  # 登陆用户名和密码
post_data = urllib.parse.urlencode(data).encode()

request2 = urllib.request.Request(URL3, post_data)
request2.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0')

response2 = urllib.request.urlopen(request2)
if response2.getcode() == 200:
    conf = response2.read()
    print(conf.decode('utf8'))
    print(cj)
else:
    print('Fail')
posted @ 2017-08-11 10:13  岑忠满  阅读(351)  评论(0编辑  收藏  举报