爬虫04-cookie
1.cookie用法
1.cookie用法 from urllib import request dapeng_url="http://www.renren.com/880151247/profile" headers = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36", "Cookie":"anonymid=k7moestblfwn9n; depovince=GW; _r01_=1; JSESSIONID=abctZySjQA4fw9VKIuhdx; taihe_bi_sdk_uid=2953991007be972b7f10573a4b00556f; taihe_bi_sdk_session=7010938ea28567089ef82bd005ff5f88; ick_login=ee41edeb-6bf5-4a08-a2d3-1b4dd04be9ad; _de=DBED19DF5788B76E340E4CEC836F3769696BF75400CE19CC; p=37a48b8069a8222835e05b01a2b211182; first_login_flag=1; ln_uact=980885033@qq.com; ln_hurl=http://hdn.xnimg.cn/photos/hdn421/20140822/1530/main_EZp3_74b800010d6a195a.jpg; t=632d1c65a531d7f64c812aabb68854132; societyguester=632d1c65a531d7f64c812aabb68854132; id=382801152; xnsid=38a3ee67; ver=7.0; loginfrom=null; jebe_key=4cd0ae48-b241-48a7-a029-1b6d9f412d1f%7C79960deff086b3597e7cca906ae5feaa%7C1583892666540%7C1%7C1583892665558; jebe_key=4cd0ae48-b241-48a7-a029-1b6d9f412d1f%7C79960deff086b3597e7cca906ae5feaa%7C1583892666540%7C1%7C1583892665559; wp_fold=0; jebecookies=7dc27970-341a-46da-a15f-09d125d1b9ca|||||" } req=request.Request(url=dapeng_url,headers=headers) resp=request.urlopen(req) with open("renren.html","w",encoding="utf-8") as fp: #write写入的是str格式 fp.write(resp.read().decode("utf-8"))
2.利用cookieJar登录
from urllib import request from http.cookiejar import CookieJar from urllib import parse
dapeng_url="http://www.renren.com/880151247/profile" login_url="http://www.renren.com/PLogin.do" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"} data = { "email": "980885033@qq.com", "password": "980885033..." } def get_opener(): #1.登录 #1.1创建一个cookiejar对象 cookiejar=CookieJar() #1.2使用cookiejar创建一个HTTPCookieProcess对象 handler=request.HTTPCookieProcessor(cookiejar) #1.3使用上一步创建的handler创建一个opener opener=request.build_opener(handler) return opener def login_renren(opener): #1.4使用opener发送登陆的请求 req=request.Request(login_url,data=parse.urlencode(data).encode("utf-8"),headers=headers) opener.open(req) def visit_profile(opener): #2.访问个人主页 #获取个人主页的页面的时候,不用新建opener,使用之前创建好的opener req=request.Request(dapeng_url,headers=headers) resp=opener.open(req) with open("renren2.html","w",encoding="utf-8") as fp: fp.write(resp.read().decode("utf-8")) if __name__ == '__main__': opener=get_opener() login_renren(opener) visit_profile(opener)
3.存取cookie信息
from urllib import request from http.cookiejar import MozillaCookieJar #1.存储cookies文件 # cookiejar=MozillaCookieJar("other/cookie.txt") # handler=request.HTTPCookieProcessor(cookiejar) # opener=request.build_opener(handler) # # resp=opener.open("http://www.baidu.com") # cookiejar.save(ignore_discard=True)#保存过期的cookies信息 #2.加载cookie cookiejar=MozillaCookieJar("other/cookie.txt") cookiejar.load(ignore_discard=True)#过期的cookie也加载 handler=request.HTTPCookieProcessor(cookiejar) opener=request.build_opener(handler) resp=opener.open("http://www.baidu.com") for cookie in cookiejar: print(cookie)