校花网
校花网
思路
正常思路
遇到问题
图片链接有完整的和不完整的
import requests
from lxml import etree
import os
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
def work():
if not os.path.exists("pic"):
os.mkdir("pic")
url="http://www.xiaohuar.com/2014.html"
res=requests.get(url=url,headers=headers).text
tree=etree.HTML(res)
imgs=tree.xpath("//div[@id='images']/div/div/a/img/@src")
for i in imgs:
i=str(i)
#爬取的图片链接有两种形式,一种是完整的,一种是残缺的,筛选出残缺的,然后进行拼接
if i.startswith("/d"):
img_url="http://www.xiaohuar.com"+i
else:
img_url=i
img_name="pic/"+i.split("/")[-1]
#获取图片数据
res=requests.get(url=img_url,headers=headers).content
with open(img_name,"wb")as fw:
fw.write(res)
print(img_name+"下载成功!")
if __name__ == '__main__':
work()