Python网络爬虫-青年大学习
前提
由于每周都要查看是否所有人都完成了本周的大学习,一个一个查是比较麻烦的,收集截图也是另一种方法,因此诞生了下面的爬虫程序
整体思路
1、请求网页,获取Cookie(会在整个过程中使用),获取_jfinal_token
url = 'http://mp.vol.jxmfkj.com/pub/login?returnUrl=/' response = requests.get(url) dict = response.cookies.get_dict() start = response.text.find("value='") end = response.text.find("'", start + 7) str = response.text[start + 7:end] _jfinal_token = str session = dict["JSESSIONID"]
2、模拟登录请求初步判断为使得Cookie生效
2.1、加密后的密码如何查看
1)首先打开登陆页面
2)打开F12中的网络
3)登录
4)在F12中找到如下包
然后点击在右边的载荷中就可以看到
2.2 代码:
def step1(_jfinal_token,session): session = "JSESSIONID="+session headers = { 'Content-Type':'application/x-www-form-urlencoded', 'Origin':'http://mp.vol.jxmfkj.com', 'Referer':'http://mp.vol.jxmfkj.com/pub/login', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', 'Cookie':session } body_value = { "_jfinal_token":_jfinal_token, "userCode":"后台账户", "password":"加密后的密码", "verifyCode":"" } # body_value = urllib.urlencode(body_value) requests.post(url="http://mp.vol.jxmfkj.com/pub/login/submit?returnUrl=/",data=body_value,headers=headers) response=requests.get(url="http://mp.vol.jxmfkj.com/",headers=headers)
3、模拟请求爬虫下来完成的名单
def step2(session): session = "JSESSIONID="+session print(session) headers = { "Accept":"*/*", "Accept-Encoding":"gzip,deflate,br", "Accept-Language":"zh-CN,zh;q=0.9", "Connection":"keep-alive", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36", "Cookie":session, "Host":"mp.vol.jxmfkj.com", "Referer":"http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/index" } response=requests.get(url="http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/list?iclassId=9&inid=N0013000510051004&pageNumber=1&pageSize=50",headers=headers) list = response.json() return list['list']
4、Mysql读入班级或者组织名单(学号,姓名之类的),在此之前需要处理出已完成的组织人员名单
def step3(list): ans = [] for rows in list: ans.append(rows['username']) return ans def step4(): conn = pymysql.connect( host='localhost', user='root', password='root', db='tt', charset='utf8' ) cur = conn.cursor() try: create_sql = "select * from info " cur.execute(create_sql) data=cur.fetchall() except Exception as e: print("exception",e) else: conn.close() print('finish') return data
5、验证查看
for rows in data: flag = False if rows[0] in finish: flag = True if rows[1] in finish: flag = True if(flag == False): print(rows[0])
!!!!!!!!!!!注意!!!!!!!!!!!!!!!!!
只为简便工作使用,切记不要恶意给服务器增加压力,切记不要恶意使用
完整代码
import requests import pymysql def step1(_jfinal_token,session): session = "JSESSIONID="+session headers = { 'Content-Type':'application/x-www-form-urlencoded', 'Origin':'http://mp.vol.jxmfkj.com', 'Referer':'http://mp.vol.jxmfkj.com/pub/login', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', 'Cookie':session } body_value = { "_jfinal_token":_jfinal_token, "userCode":"后台账户", "password":"加密后的密码", "verifyCode":"" } # body_value = urllib.urlencode(body_value) requests.post(url="http://mp.vol.jxmfkj.com/pub/login/submit?returnUrl=/",data=body_value,headers=headers) response=requests.get(url="http://mp.vol.jxmfkj.com/",headers=headers) # print(response.text) def step2(session): session = "JSESSIONID="+session print(session) headers = { "Accept":"*/*", "Accept-Encoding":"gzip,deflate,br", "Accept-Language":"zh-CN,zh;q=0.9", "Connection":"keep-alive", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36", "Cookie":session, "Host":"mp.vol.jxmfkj.com", "Referer":"http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/index" } response=requests.get(url="http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/list?iclassId=9&inid=N0013000510051004&pageNumber=1&pageSize=50",headers=headers) list = response.json() return list['list'] def step3(list): ans = [] for rows in list: ans.append(rows['username']) return ans def step4(): conn = pymysql.connect( host='localhost', user='root', password='root', db='tt', charset='utf8' ) cur = conn.cursor() try: create_sql = "select * from info " cur.execute(create_sql) data=cur.fetchall() except Exception as e: print("exception",e) else: conn.close() print('finish') return data if __name__ == '__main__': url = 'http://mp.vol.jxmfkj.com/pub/login?returnUrl=/' response = requests.get(url) dict = response.cookies.get_dict() start = response.text.find("value='") end = response.text.find("'", start + 7) str = response.text[start + 7:end] _jfinal_token = str session = dict["JSESSIONID"] step1(_jfinal_token,session) list = step2(session) finish = step3(list) data=step4() for rows in data: flag = False if rows[0] in finish: flag = True if rows[1] in finish: flag = True if(flag == False): print(rows[0])