抽屉网点赞爬虫
import requests from bs4 import BeautifulSoup import time user_cookie = "" # 注意:抽屉网 cookies中的gpsd是最要的验证数据 def login(phone_id, phone_pwd): global user_cookie # 因为抽屉的cookie设置比较特别,其cookie为未登录状态下的cookie通过登录成功来进行验证,从而认证为登录状态下的cookie r1 = requests.get( url= 'https://dig.chouti.com/', headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', } ) user_cookie = r1.cookies.get_dict() r2 = requests.post( url= 'https://dig.chouti.com/login', headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', }, data = { "phone": "86%d" % int(phone_id), "password": phone_pwd, "oneMonth": 1 }, cookies = user_cookie ) # 返回认证后的cookie return user_cookie def thumbsup(): # 抽屉新热榜只有120页 i=1 while i <= 120: r3 = requests.get( url= 'https://dig.chouti.com/all/hot/recent/%d'% i, headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', }, cookies = user_cookie, ) print("正在给第%d页点赞" % i,) soup = BeautifulSoup(r3.text, 'html.parser') res_list = soup.find_all(name='div', attrs={'class': 'item'}) for res in res_list: requests.post( url='https://dig.chouti.com/link/vote?linksId=%d' % int(res.find(name='img').attrs['lang']), headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', }, cookies=user_cookie ) time.sleep(0.5) i+=1 print("准备执行下一页...") def main(): login("手机号", "密码") thumbsup() if __name__ == "__main__": main()