day10_单线程和多线程下载文件
单线程下载文件:
import requests
import time
from hashlib import md5
def down_load_pic(url):
req = requests.get(url)
m = md5(url.encode())
with open(m.hexdigest() + '.png', 'wb') as fw: # md5后的名称作为图片的前缀
fw.write(req.content)
url_list = [
'http://www.nnzhp.cn/wp-content/uploads/2019/10/f410afea8b23fa401505a1449a41a133.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/481b5135e75c764b32b224c5650a8df5.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/b23755cdea210cfec903333c5cce6895.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/542824dde1dbd29ec61ad5ea867ef245.png',
]
start_time = time.time()
for url in url_list:
down_load_pic(url)
end_time = time.time()
print(end_time - start_time)
多线程下载文件:
import requests
import time
import threading
from hashlib import md5
def down_load_pic(url):
req = requests.get(url)
m = md5(url.encode())
with open(m.hexdigest() + '.png', 'wb') as fw:
fw.write(req.content)
url_list = [
'http://www.nnzhp.cn/wp-content/uploads/2019/10/f410afea8b23fa401505a1449a41a133.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/481b5135e75c764b32b224c5650a8df5.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/b23755cdea210cfec903333c5cce6895.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/542824dde1dbd29ec61ad5ea867ef245.png',
]
start_time = time.time()
for url in url_list:
t = threading.Thread(target=down_load_pic, args=(url,))
t.start()
while threading.activeCount() != 1:
pass
end_time = time.time()
print(end_time - start_time)
GIL(Global Interpreter Lock),全局解释器锁,导致python多线程不能利用多核cpu,只能用一个cpu,python多线程比较鸡肋,看着像并行工作,实际是cpu上下文切换,cpu负责调度,调度完了剩下的就是把图片存到磁盘跟cpu没有关系了,cpu再去调度其他的线程
如果url比较多,list里有1000个url,不能同时启动1000个线程,要用到线程池,pip install threadpool,再导入threadpool
import threadpool
import requests
from hashlib import md5
def down_load_pic(url):
req = requests.get(url)
m = md5(url.encode())
with open(m.hexdigest() + '.png', 'wb') as fw:
fw.write(req.content)
url_list = [
'http://www.nnzhp.cn/wp-content/uploads/2019/10/f410afea8b23fa401505a1449a41a133.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/481b5135e75c764b32b224c5650a8df5.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/b23755cdea210cfec903333c5cce6895.png',
'http://www.nnzhp.cn/wp-content/uploads/2019/11/542824dde1dbd29ec61ad5ea867ef245.png',
]
pool = threadpool.ThreadPool(20) # 实例化一个线程池,开辟20个线程,根据自己电脑的性能设置
reqs = threadpool.makeRequests(down_load_pic, url_list) # 分配数据,每个线程分配多少个url
[pool.putRequest(req) for req in reqs] # 加到线程池里
pool.wait() # 等待子线程执行完
print('end')