python爬取图片

 

一、给定url,直接下载到本地

import re
import requests
from bs4 import BeautifulSoup


def get_gif(url, a):
    response = requests.get(url)
    # 自己电脑上的文件夹路径,默认命名 a.gif
    with open("C:\\Users\\acm\\Desktop\\新建文件夹\\%d.gif" % a, 'wb') as file:
        file.write(response.content)


if __name__ == '__main__':
    # url 获取动态图片地址
    url = 'http://game.gtimg.cn/images/nz/cp/a20201117decbeta/m1-prop1.gif'
    get_gif(url, 1)
View Code

 二、给定网页链接url,按照规律爬取网页上的所有图片

import re
import requests
from bs4 import BeautifulSoup


def get_url(url):
    # 获取网页链接
    response = requests.get(url)
    response.encoding = 'utf-8'
    # print(response.text)
    # 根据正则表达式查找一系列url地址, 即下面括号中的部分, 这根据网络代码中的规律来自行更改
    url_addr = r'<img src="(.*?)" alt=".*?">'
    # 从网页上查找所有符合条件的图片链接
    url_list = re.findall(url_addr, response.text)
    return url_list


def get_photo(url, a):
    response = requests.get(url)
    # 命名为 a.jpg 下载到本地
    with open("C:\\Users\\acm\\Desktop\\新建文件夹\\%d.jpg" % a, 'wb') as file:
        file.write(response.content)


if __name__ == '__main__':
    # 网页链接url
    url = 'http://www.netbian.com/'
    url_list = get_url(url)
    a = 1
    for url in url_list:
        get_photo(url, a)
        a += 1
View Code

 三、加上了网页的headers和文件操作

import re
import os
import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

if __name__ == '__main__':
    url = 'http://www.netbian.com/weimei/'
    response = requests.get(url, headers)
    response.encoding = 'utf-8'
    urls = re.findall('img src="(.*?)" alt=".*?"', response.text)
    print(urls)
    a = 1
    for i in urls:
        response = requests.get(i)
        dir_name = 'photos'
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        file_name = str(a) + '.jpg'
        with open(dir_name + '/' + file_name, 'wb') as file:
            file.write(response.content)
        a += 1
View Code

 

posted @ 2020-12-02 15:08  cherish__lin  阅读(184)  评论(0编辑  收藏  举报