python爬虫---表情包批量采集
代码:
import requests from pyquery import PyQuery as pq # 比xpath还要灵活的html解析工具 # 定义请求 headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Cookie": "wp_xh_session_16c522c7d534bf6487d9468a3bd29107=656874b5170c21bdda03793c065cae8a%7C%7C1640268459%7C%7C1640264859%7C%7Cc31c2b17bba3ec3ff355ee795c78e8f3", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62" } html = requests.get("http://www.bbsnet.com/biaoqingbao", headers=headers).text # 将数据加载到工厂 data = pq(html) ret = data(".thumbnail a img").items() # 通过循环提取每一个 for i in ret: # 图片地址 addr = i.attr("src") # 图片名称 name = i.attr("alt") # print("图片的名称是%s,图片的地址%s"%(name,addr)) # 访问链接获取内容 img_data = requests.get(addr).content with open("./emoji/%s.gif" % name, "wb") as file: file.write(img_data) print("表情包:%s保存成功!" % name)