4k图片爬取+中文乱码
4k图片爬取+中文乱码
此案例有三种乱码解决方法,推荐第一种
4k图片爬取其实和普通图片爬取的过程是没有本质区别的
import requests
import os
from lxml import etree
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
def work():
if not os.path.exists("img"):
os.mkdir("img")
url="http://pic.netbian.com/4kqiche/"
res=requests.get(url=url,headers=headers)
#最推荐方法
res.encoding=res.apparent_encoding
#res.encoding="utf8"
tree=etree.HTML(res.text)
imgs=tree.xpath("//ul[@class='clearfix']//a")
for i in imgs:
img_u=i.xpath("./img/@src")[0]
img_url="http://pic.netbian.com"+img_u
img_name=i.xpath("./b/text()")[0]
# .encode("ISO-8859-1").decode("utf8")
img_content=requests.get(url=img_url,headers=headers).content
with open(f"img/{img_name}.jpg","wb")as fw:
fw.write(img_content)
print(img_name+"下载成功")
if __name__ == '__main__':
work()