【Python爬虫】教务处模拟登陆
Python2模拟登陆获取cookie
import urllib import urllib2 import cookielib filename = 'cookie.txt' #声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件 cookie = cookielib.MozillaCookieJar(filename) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) #设置请求参数 values = {} values['__VIEWSTATE'] = "******" values['tbxUserID'] = "XXXXXXX" values['InputPwd'] = "XXXXXX" values['btnLogin.x'] = "55" values['btnLogin.y'] = "23" postdata = urllib.urlencode(values) #登录教务系统的URL loginUrl = 'http://yjsgl.***.edu.cn/login.aspx' #模拟登录,并把cookie保存到变量 #设置header user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' opener.addheaders.append( ('Host', 'yjsgl.fzu.edu.cn') ) opener.addheaders.append( ('User-Agent', user_agent) ) opener.addheaders.append( ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8') ) opener.addheaders.append( ('Accept-Language', 'zh-CN,zh;q=0.8') ) opener.addheaders.append( ('Accept-Encoding', 'gzip, deflate') ) opener.addheaders.append( ('Connection', 'keep-alive') ) opener.addheaders.append( ('Referer', 'http://yjsgl.fzu.edu.cn/login.aspx') ) try: result = opener.open(loginUrl,postdata) except opener.URLError, e: print e.reason for item in cookie: print 'Name = '+item.name print 'Value = '+item.value #保存cookie到cookie.txt中 cookie.save(ignore_discard=True, ignore_expires=True) #利用cookie请求访问另一个网址,此网址是成绩查询网址 gradeUrl = 'http://yjsgl.***.edu.cn/xsgl/xsxx_show.aspx' #请求访问成绩查询网址 result = opener.open(gradeUrl) print result.read()
直接从本地读取cookie模拟登陆
# -*- coding: utf-8 -*- """ Created on Fri Apr 14 16:12:55 2017 @author: zeze """ import cookielib import urllib2 #创建MozillaCookieJar实例对象 cookie = cookielib.MozillaCookieJar() #从文件中读取cookie内容到变量 cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True) #创建请求的request Url = urllib2.Request("http://yjsgl.***.edu.cn/xsgl/xsxx_show.aspx") #利用urllib2的build_opener方法创建一个opener opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) response = opener.open(Url) print response.read()
Python3版本
import urllib #import cookielib import http.cookiejar filename = 'cookie.txt' # 声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件 #设置一个cookie处理器,它负责从服务器下载cookie到本地,并且在发送请求时带上本地的cookie cookie = http.cookiejar.MozillaCookieJar(filename) #opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) cookie_support = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) urllib.request.install_opener(opener) # 设置请求参数 values = {} values['__VIEWSTATE'] = "/wEPDwUJODA3MTI1345b2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAgUIYnRuTG9naW4FDEltYWdlQnV0dG9uMQ==" values['tbxUserID'] = "1345" values['InputPwd'] = "5345e" values['btnLogin.x'] = "55" values['btnLogin.y'] = "23" #postdata = urllib.urlencode(values) postdata=urllib.parse.urlencode(values).encode('utf-8') # 登录教务系统的URL loginUrl = 'http://yjsgl.123.e34u.cn/login.aspx' # 模拟登录,并把cookie保存到变量 # 设置header user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' opener.addheaders.append(('Host', 'yjsgl.fzu.edu.cn')) opener.addheaders.append(('User-Agent', user_agent)) opener.addheaders.append( ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')) opener.addheaders.append(('Accept-Language', 'zh-CN,zh;q=0.8')) opener.addheaders.append(('Accept-Encoding', 'gzip, deflate')) opener.addheaders.append(('Connection', 'keep-alive')) opener.addheaders.append(('Referer', 'http://yjsgl.f234u.cn/login.aspx')) try: result = opener.open(loginUrl, postdata) except opener.URLError as e: print(e.reason) for item in cookie: print('Name = ' + item.name) print('Value = ' + item.value) # 保存cookie到cookie.txt中 cookie.save(ignore_discard=True, ignore_expires=True) # 利用cookie请求访问另一个网址,此网址是成绩查询网址 gradeUrl = 'http://yjsgl.fzu.edu.cn/xsgl/xsxx_show.aspx' # 请求访问成绩查询网址 result = opener.open(gradeUrl) print("返回的结果") print(result.read().decode('utf8'))#先看.info 解决乱码的问题 text = result.read() save_path="D:/snatch2.txt" # save_path 's file unnecessary to be exist f_obj = open(save_path,'wb') f_obj.write(text) print("snatch successfully.")