day10_多线程把六个网站写到文件里
多线程把六个网站写到文件里(串行)
import requests,time,threading
def write_html(url,name):
r = requests.get(url)
with open(name,'w',encoding = 'utf8') as f:
f.write(r.text)
urls =['www.nnzhp.cn','www.besttest.cn','www.imdsx.cn','sb.nnzhp.cn','bbs.besttest.cn','video.besttest.cn']
start_time = time.time()
for url in urls: # 循环每个url
new_url = 'http://' + url #六个网站
file_name = url + '.html' # 写入的六个html文件
write_html(new_url,file_name) # 串行
end_time = time.time()
print('程序总共运行了', end_time - start_time)
多线程把六个网站写到文件里(并行)
import requests
import time
import threading
def write_html(url, name):
r = requests.get(url)
with open(name, 'w', encoding='utf8') as f:
f.write(r.text)
urls = ['www.nnzhp.cn', 'www.besttest.cn', 'www.imdsx.cn', 'sb.nnzhp.cn', 'bbs.besttest.cn', 'video.besttest.cn']
lis = [] # 存放每个线程
start_time = time.time()
for url in urls: # 循环每个url
new_url = 'http://' + url
file_name = url + '.html'
t = threading.Thread(target=write_html, args=(new_url, file_name)) # 实例化一个线程
lis.append(t)
t.start() # 启动一个线程
# 启动六个线程,让他们跑着,主线程等他们,join就是主线程在等待每个子线程执行完成,主线程等待的意思是让子线程把所有的页面都爬完,然后主线程发送邮件
for obj in lis:
obj.join() # join必须放在启动的六个线程外面
end_time = time.time()
print('程序总共运行了', end_time - start_time)