wutao1935

导航

python爬取美女源码

 可以直接使用
1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 #需求:解析下载图片数据 http://pic.netbian.com/4kmeinv/ 4 import requests 5 from lxml import etree 6 import os 7 if __name__ == "__main__": 8 9 url = 'https://pic.netbian.com/4kmeinv/index_%d.html' 10 headers = { 11 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' 12 } 13 14 for n in range(1,150): 15 new_url = format(url%n) 16 print(new_url) 17 response = requests.get(url=new_url,headers=headers) 18 #手动设定响应数据的编码格式 19 # response.encoding = 'utf-8' 20 page_text = response.text 21 22 #数据解析:src的属性值 alt属性 23 tree = etree.HTML(page_text) 24 li_list = tree.xpath('//div[@class="slist"]/ul/li') 25 26 27 #创建一个文件夹 28 if not os.path.exists('./picLibs'): 29 os.mkdir('./picLibs') 30 31 for li in li_list: 32 img_src = 'http://pic.netbian.com'+li.xpath('./a/img/@src')[0] 33 img_name = li.xpath('./a/img/@alt')[0]+'.jpg' 34 #通用处理中文乱码的解决方案 35 img_name = img_name.encode('iso-8859-1').decode('gbk') 36 37 # print(img_name,img_src) 38 #请求图片进行持久化存储 39 img_data = requests.get(url=img_src,headers=headers).content 40 img_path = f'picLibs/{n}'+img_name 41 with open(img_path,'wb') as fp: 42 fp.write(img_data) 43 print(img_name,'下载成功!!!')

 

posted on 2021-11-18 16:23  逐风浪子  阅读(174)  评论(1编辑  收藏  举报