2 import requests
 3 from bs4 import BeautifulSoup
 4 import urllib
 5 import os
 6 import threading
 7 import time
 8 lock1=threading.RLock()#对于公共资源PAGE_URL,FACE_URL要加锁
 9 lock2=threading.RLock()
10 PAGE_URL=[]
11 FACE_URL=[]
12 for i in range(1,1476):
13     PAGE_URL.append('http://www.doutula.com/photo/list/?page='+str(i))  #观察URL 
14 
15 def get_faceurl():
16     while True:
17         lock1.acquire()
18         if len(PAGE_URL)==0: #判断线程结束条件
19             lock1.release()
20             break
21         else:
22             url=PAGE_URL.pop()#修改公共资源后释放锁
23             lock1.release()
24             respon=requests.get(url)
25             soup=BeautifulSoup(respon.text,'html.parser')
26             lock2.acquire()#修改公共资源前加锁
27             for tag in soup.find_all('img',attrs={'class':'img-responsive lazy image_dta'}):
28                 FACE_URL.append(tag['data-original'])
29             lock2.release()#释放锁
30 def download_face():
31      while True:
32         lock2.acquire()
33         if len(FACE_URL)==0:
34             lock2.release()
35             break
36         else:
37             url=FACE_URL.pop()
38             lock2.release()
39             path_file=os.path.join('download',url.split('/')[-1])#由于LINUX和WINDOWS的路径斜杠不同所以用os.path.join
40             urllib.request.urlretrieve(url,path_file)
41 def main():
42     for i in range(2):
43         td=threading.Thread(target=get_faceurl)
44         td.start()
45         td.join()
46     for i in range(4):
47         td=threading.Thread(target=download_face)
48         td.start()
49         td.join()        
50 main()

两个线程去获取每个页面的图片的URL存储到一个全局列表里采用列表的append方法，四个线程从公共变量里获取URL进行下载，采用列表的pop方法。

posted on 2018-04-13 14:52 yrsss 阅读(131) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

公告

导航