python 单线程图片下载
import urllib.request import urllib.parse import urllib.error import re import os import ssl ssl._create_default_https_context = ssl._create_unverified_context path = "./images" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", "referer": "https://www.mzitu.com/xinggan/" } def handler_request(url, pageIndex): url = url + str(pageIndex) # 构建请求对象 request = urllib.request.Request(url=url, headers=headers) return request def get_images_url(content, basePath): patternNames = re.compile(r'<li>.*?<img .* alt=(.*?) .*? />.*?') patternHrefs = re.compile(r'<li><a href=(.*?) .*?>.*?') alts = patternNames.findall(content, re.S) hrefs = patternHrefs.findall(content, re.S) image_map = {} for i in range(len(hrefs)): key = alts[i][1: len(alts[i]) - 1] image_map[key] = hrefs[i] for item in image_map.items(): image_category_response(item, basePath) def image_category_response(item, basePath): alt = item[0] save_folder = os.path.join(basePath, alt) if not os.path.exists(save_folder): os.mkdir(save_folder) baseurl = item[1][1: len(item[1]) -1] pageCount = 1000 try: for pageIndex in range(pageCount): page_url = baseurl + "/" + str(pageIndex) try: # 构建请求对象 request = urllib.request.Request(url=page_url, headers=headers) # 发送请求 response = urllib.request.urlopen(request) content = response.read().decode() imgPattern = re.compile(r'<div class="main-image"><p>.*?<img src=(.*?) .*? />.*?') imgUrl = imgPattern.findall(content, re.S) download_images(imgUrl[0], save_folder) except urllib.error.URLError as e: raise TypeError("最大页面数{0}".format(pageIndex - 1)) except Exception as e: print(e) def download_images(url, save_path): url = url[1: len(url) - 1] print(url) # 构建请求对象 request = urllib.request.Request(url=url, headers=headers) # 发送请求 response = urllib.request.urlopen(request) filename = url.split('/')[-1] with open(os.path.join(save_path, filename), 'wb') as fb: fb.write(response.read()) def parse_pages(content): print(content) def main(): url = 'https://www.mzitu.com/xinggan/page/' start_page = int(input("请输入起始页码:")) end_page = int(input("请输入结束页码:")) # 创建根文件夹 if not os.path.exists(path): os.mkdir(path) for pageIndex in range(start_page, end_page + 1): print("...........开始下载第{0}页".format(pageIndex)) # 创建文件夹 save_path = create_folder(pageIndex) # 生成request request = handler_request(url, pageIndex) # 发送请求对象,获取相应内容 response = urllib.request.urlopen(request) content = response.read().decode() # 解析内容,提取图片并且下载 get_images_url(content, save_path) print("...........结束下载第{0}页".format(pageIndex)) def create_folder(pageIndex): save_path = os.path.join(path, str(pageIndex)) if not os.path.exists(save_path): os.mkdir(save_path) return save_path.replace("\\", "/") + "/" if __name__ == "__main__": main()