Python3 --- 下载图片代码:
#! /usr/bin/python # -*- coding: UTF-8 -*- from bs4 import BeautifulSoup from urllib.request import urlretrieve import requests import time import os class Download(object): def __init__(self): self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" } ''' 获取下载链接地址: ''' def list_url(self,num): list_url = [] for i in range(1,num): if i == 1: url = 'http://www.shuaia.net/meinv/' else: url = 'http://www.shuaia.net/meinv/index_%d.html' % num req = requests.get(url=url,headers=self.headers) req.encoding = 'utf-8' html = req.text bf = BeautifulSoup(html,'lxml') targets_url = bf.find_all(class_ = "item-img") for each in targets_url: list_url.append(each.img.get('alt') + '=' + each.get('href')) time.sleep(3) print("链接地址采集完成") #print(list_url) return list_url ''' 下载图片 ''' def download_img(self,list_url): for each_img in list_url: img_info = each_img.split('=') targets_url = img_info[1] filename = img_info[0] + '.jpg' print('下载:' + filename) img_req = requests.get(url=targets_url,headers = self.headers) img_req.encoding = 'utf-8' img_html = img_req.text img_bf_1 = BeautifulSoup(img_html,'lxml') img_url = img_bf_1.find_all('div',class_ = 'wr-single-content') img_bf_2 = BeautifulSoup(str(img_url), 'lxml') img_url_all = img_bf_2.img.get('src') urlretrieve(url=img_url_all, filename='D:\\PycharmProjects1\\images\\' + filename) time.sleep(10) if __name__ == '__main__': dow = Download() list_url = dow.list_url(2) dow.download_img(list_url)