# -*- coding: utf-8 -*- import os import re import shutil import requests from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding('utf8') def get_soup(url): text = requests.get(url).text return BeautifulSoup(text, 'lxml') def mkdir(path): if os.path.exists(path): while True: key = raw_input('%s已存在,继续操作将删除它,是否继续?(Y/N)' % path) if key == 'Y': break elif key == 'N': exit() if os.path.isdir(path): shutil.rmtree(path) else: os.remove(path) os.mkdir(path) def download_all_albums_of_current_page(url): prefix = 'http://desk.zol.com.cn' soup = get_soup(url) url_of_next_page = prefix + soup.find('a', id='pageNext').get('href') for a in soup.find('ul', 'pic-list2 clearfix').find_all('a'): url = prefix + a.get('href') soup = get_soup(url) albums_name = soup.find('a', id='titleName').get_text() print '正在下载《%s》相册……' % albums_name storage_path_of_picture = storage_path_of_albums + '/' + albums_name mkdir(storage_path_of_picture) count = 1 for a in soup.find('ul', id='showImg').find_all('a'): url = prefix + a.get('href') soup = get_soup(url) url = soup.find('img', id='bigImg').get('src') content = requests.get(url).content suffix = re.search(r'.*(\..*)', url).group(1) filename = storage_path_of_picture + '/' + str(count) + suffix print '正在下载第%d张照片……' % count with open(filename, 'wb') as f: f.write(content) count += 1 while True: key = raw_input('当前页面已经下载完了,是否继续下载下一页?(Y/N)') if key == 'Y': download_all_albums_of_current_page(url_of_next_page) elif key == 'N': break if __name__ == '__main__': storage_path_of_albums = './picture' mkdir(storage_path_of_albums) url = 'http://desk.zol.com.cn/meinv/' download_all_albums_of_current_page(url)
如果您觉得我的文章对您有帮助并想鼓励我继续原创,请扫描下方二维码进行打赏!
谢谢!