爬天极网进程池.py

import os
import requests
# 导入进程:
from multiprocessing import Pool
from bs4 import BeautifulSoup


# 定义下载图片功能:
def download_img(url, dirname=""):
res = requests.request("get", url)
filename = url.split("/")[-1]
with open(dirname + "/" + filename, "wb") as f:
f.write(res.content)
print(f"{dirname}{filename}下载成功!")


# 定义找到大图片:
def find_big_img(url):
res = requests.request("get", url)
bs = BeautifulSoup(res.content, "html.parser")
div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"})
img = div_obj.find("img")
# print(img,"u")
current_img_url = img.get("src")
return current_img_url


def get_page_count(url):
res = requests.request("get", url)
bs = BeautifulSoup(res.content, "html.parser")
bs.find(name="div", attrs={"class": "flym"}).find_all(name="")


baseurl = "http://pic.yesky.com/c/6_18332"


def run(url, num):
res = requests.request("get", f"{url}_{num}.shtml")
bs = BeautifulSoup(res.text, "html.parser")
# lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a")
lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd")
print(lst)

for i in lst:
# i.find("a").get("title"),"qwdeawdawfesf"
dirname = i.find("a").get("title")
if os.path.isdir(dirname):
pass
else:
os.mkdir(dirname)
link = i.find("a").get("href")
# print(link)
res1 = requests.request("get", link)
# print(res1.content)
bs1 = BeautifulSoup(res1.content, "html.parser")
div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"})
img = div_obj.find("img")
# print(img)
current_img_url = img.get("src")
download_img(current_img_url, dirname)

div_overview = bs1.find(name="div", attrs={"class": "overview"})
for i in div_overview.find_all("a"):
if link == i.get("href"):
continue
else:
url = i.get("href")

download_img(find_big_img(url), dirname)


if __name__ == '__main__':
#开5个进程:
pool = Pool(5)
#循环第1到7页
for i in range(1, 8):
#pool.apply_async(函数名,(函数的参数))
pool.apply_async(run, (baseurl, i))
pool.close()
pool.join()
# print('非阻塞~~~~')
# print('end')
效果如下:

 

 



posted @ 2020-01-18 18:28  干it的小张  阅读(143)  评论(0编辑  收藏  举报