python爬虫日记-爬取虎牙图片

 超棒的爬取壁纸小脚本——放到桌面壁纸存在的文件夹中运行即可(依赖re,request,bs4,BeautifulSoup)

查看代码
 import requests
import re
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
}

url = 'https://www.huya.com/g/2168'
response = requests.get(url, headers=headers).text

soup = BeautifulSoup(response, 'lxml')
# class是关键字 它是声明一个类的所以加下划线
girls = soup.find_all('img', class_='pic')

for girl in girls:
    girl_url = girl['data-original'].split('?')[0]
    girl_title = girl['title']
    #print(girl_url, girl_title)
    img = requests.get(girl_url)
    with open('./%s.jpg' %girl_title,'wb') as jpg:
        jpg.write(img.content)
        print("<%s> 下载成功" %girl_title)

依赖requests库,运行此代码会爬取https://www.huya.com/g/2168上的图片,图片名字为@alt,会爬取到此脚本所在目录中的img文件夹中,没有此文件夹则运行不成功。

查看代码
 import requests
from lxml import etree
url='https://www.huya.com/g/2168'
r=requests.get(url)
data=etree.HTML(r.text)

girls=data.xpath('//img[@class="pic"]')  #匹配
for girl in girls:
    img_url=girl.xpath('./@data-original')[0]
    name=girl.xpath('./@alt')
    img=requests.get(img_url)
    with open('./aaa/%s.jpg' %name,'wb') as jpg:
        jpg.write(img.content)
        print("<%s> 下载成功" %name)

爬取单个图片https://anchorpost.msstatic.com/cdnimage/anchorpost/1053/af/5bc9192add9117924ab8cde0b86049_2168_1660647126.jpg,会保存为此脚本目录下1.jpg

查看代码
 # 1. 导包
import requests

# 2. 指定url
img_url = "https://anchorpost.msstatic.com/cdnimage/anchorpost/1053/af/5bc9192add9117924ab8cde0b86049_2168_1660647126.jpg"
# 3. 使用GET方法发送请求,该方法会返回一个响应对象
response = requests.get(img_url)
# 4. 获取响应数据
print(response.content)

# 保存数据
with open('./1.jpg', 'wb', ) as f:
    f.write(response.content)

 爬取百度图片搜索到的图片,依赖tqdm和requests,保存到当前目录中图片名字imgmaintenanceWorker_+num

查看代码
 # -*- coding: UTF-8 -*-"""
import requests
import tqdm


def configs(search, page, number):
    """

    :param search:
    :param page:
    :param number:
    :return:
    """
    url = 'https://image.baidu.com/search/acjson'
    params = {
        "tn": "resultjson_com",
        "logid": "11555092689241190059",
        "ipn": "rj",
        "ct": "201326592",
        "is": "",
        "fp": "result",
        "queryWord": search,
        "cl": "2",
        "lm": "-1",
        "ie": "utf-8",
        "oe": "utf-8",
        "adpicid": "",
        "st": "-1",
        "z": "",
        "ic": "0",
        "hd": "",
        "latest": "",
        "copyright": "",
        "word": search,
        "s": "",
        "se": "",
        "tab": "",
        "width": "",
        "height": "",
        "face": "0",
        "istype": "2",
        "qc": "",
        "nc": "1",
        "fr": "",
        "expermode": "",
        "force": "",
        "pn": str(60 * page),
        "rn": number,
        "gsm": "1e",
        "1617626956685": ""
    }
    return url, params


def loadpic(number, page):
    """

    :param number:
    :param page:
    :return:
    """
    while (True):
        if number == 0:
            break
        url, params = configs(search, page, number)
        result = requests.get(url, headers=header, params=params).json()
        url_list = []
        for data in result['data'][:-1]:
            url_list.append(data['thumbURL'])
        for i in range(len(url_list)):
            getImg(url_list[i], 60 * page + i, path)
            bar.update(1)
            number -= 1
            if number == 0:
                break
        page += 1
    print("\nfinish!")


def getImg(url, idx, path):
    """

    :param url:
    :param idx:
    :param path:
    :return:
    """
    img = requests.get(url, headers=header)
    #图片名字maintenanceWorker_
    file = open(path + 'maintenanceWorker_' + str(idx + 1) + '.jpg', 'wb')
    file.write(img.content)
    file.close()


if __name__ == '__main__':
    search = input("请输入搜索内容:")
    number = int(input("请输入需求数量:"))
    path = './img'
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0'}

    bar = tqdm.tqdm(total=number)
    page = 0
    loadpic(number, page)

 

posted @ 2022-09-04 18:57  张小张#  阅读(68)  评论(0编辑  收藏  举报