python爬取图片
一、给定url,直接下载到本地
import re import requests from bs4 import BeautifulSoup def get_gif(url, a): response = requests.get(url) # 自己电脑上的文件夹路径,默认命名 a.gif with open("C:\\Users\\acm\\Desktop\\新建文件夹\\%d.gif" % a, 'wb') as file: file.write(response.content) if __name__ == '__main__': # url 获取动态图片地址 url = 'http://game.gtimg.cn/images/nz/cp/a20201117decbeta/m1-prop1.gif' get_gif(url, 1)
二、给定网页链接url,按照规律爬取网页上的所有图片
import re import requests from bs4 import BeautifulSoup def get_url(url): # 获取网页链接 response = requests.get(url) response.encoding = 'utf-8' # print(response.text) # 根据正则表达式查找一系列url地址, 即下面括号中的部分, 这根据网络代码中的规律来自行更改 url_addr = r'<img src="(.*?)" alt=".*?">' # 从网页上查找所有符合条件的图片链接 url_list = re.findall(url_addr, response.text) return url_list def get_photo(url, a): response = requests.get(url) # 命名为 a.jpg 下载到本地 with open("C:\\Users\\acm\\Desktop\\新建文件夹\\%d.jpg" % a, 'wb') as file: file.write(response.content) if __name__ == '__main__': # 网页链接url url = 'http://www.netbian.com/' url_list = get_url(url) a = 1 for url in url_list: get_photo(url, a) a += 1
三、加上了网页的headers和文件操作
import re import os import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } if __name__ == '__main__': url = 'http://www.netbian.com/weimei/' response = requests.get(url, headers) response.encoding = 'utf-8' urls = re.findall('img src="(.*?)" alt=".*?"', response.text) print(urls) a = 1 for i in urls: response = requests.get(i) dir_name = 'photos' if not os.path.exists(dir_name): os.mkdir(dir_name) file_name = str(a) + '.jpg' with open(dir_name + '/' + file_name, 'wb') as file: file.write(response.content) a += 1