性能
。。。
“”“ 可以实现并发,请求发送出去后和返回之前,中间时期线程空闲 “”“ # 编写方式一: 直接返回处理 from concurrent.futures import ThreadPoolExecutor import requests def task(url): response = requests.get(url=url) print(url,response) pool = ThreadPoolExecutor(5) url_list = [ 'http://www.cnblogs.com/wupeiqi', 'http://www.huaban.com/favorite/beauty/', 'http://www.bing.com', 'http://www.zhihu.com', 'http://www.sina.com', 'http://www.baidu.com', 'http://www.autohome.com.cn', ] for url in url_list: pool.submit(task,url) pool.shutdown(wait=True) --------------------------------------------------- # 编写方式二: 通过回调函数处理 from concurrent.futures import ThreadPoolExecutor import requests def task(url): response = requests.get(url=url) return response def done(future,*args,**kwargs): response = future.result() print(response.url,response) pool = ThreadPoolExecutor(5) url_list = [ 'http://www.cnblogs.com/wupeiqi', 'http://www.huaban.com/favorite/beauty/', 'http://www.bing.com', 'http://www.zhihu.com', 'http://www.sina.com', 'http://www.baidu.com', 'http://www.autohome.com.cn', ] for url in url_list: v = pool.submit(task,url) v.add_done_callback(done) pool.shutdown(wait=True)
协程 异步IO - asyncio 内部实现了异步IO操作,但没有实现全部请求,没有实现Http请求 - 示例1: asyncio.sleep(5) - 示例2: 自己封装Http数据包 - 示例3: asyncio + aiohttp - aiohttp模块: 封装Http数据包 pip3 install aiohttp - 示例4: asyncio+requests - request模块: 封装Http数据包 pip3 install request - gevent,greenlet+异步IO pip3 install greenlet pip3 install gevent - 示例1:gevent+requests - 示例2:gevent(协程池,最多发多少个请求)+requests - 示例3:gevent+requests => grequests pip3 install grequests - Twisted pip3 install twisted - Tornado pip3 install tornado =====> gevent > Twisted > Tornado > asyncio
1 协程 异步IO 2 - asyncio 3 内部实现了异步IO操作,但没有实现全部请求,没有实现Http请求 4 - 示例1: asyncio.sleep(5) 5 - 示例2: 自己封装Http数据包 6 - 示例3: asyncio + aiohttp 7 - aiohttp模块: 封装Http数据包 pip3 install aiohttp 8 - 示例4: asyncio+requests 9 - request模块: 封装Http数据包 pip3 install request 10 - gevent,greenlet+异步IO 11 pip3 install greenlet 12 pip3 install gevent 13 - 示例1:gevent+requests 14 - 示例2:gevent(协程池,最多发多少个请求)+requests 15 - 示例3:gevent+requests => grequests 16 pip3 install grequests 17 - Twisted 18 pip3 install twisted 19 - Tornado 20 pip3 install tornado 21 22 =====> gevent > Twisted > Tornado > asyncio
#!/usr/bin/env python # -*- coding:utf-8 -*- from twisted.internet import defer from twisted.web.client import getPage from twisted.internet import reactor def one_done(arg): print(arg) def all_done(arg): print('done') reactor.stop() @defer.inlineCallbacks def task(url): res = getPage(bytes(url, encoding='utf8')) # 发送Http请求 res.addCallback(one_done) yield res url_list = [ 'http://www.cnblogs.com', 'http://www.cnblogs.com', 'http://www.cnblogs.com', 'http://www.cnblogs.com', ] defer_list = [] # [特殊,特殊,特殊(已经向url发送请求)] for url in url_list: v = task(url) defer_list.append(v) d = defer.DeferredList(defer_list) d.addBoth(all_done) reactor.run() # 死循环
#IO多路复用: 监听多个socket对象 #异步IO: 非阻塞的socket + IO多路复用
import socket import select ############### HTTP请求本质 阻塞 """ sk = socket.socket() # 连接 sk.connect(('www.baidu.com',80)) # IO阻塞 print('连接成功') # 发送消息 sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n') # 等待服务端响应 data = sk.recv(8096) # IO阻塞 print(data) #关闭连接 sk.close() """ ############### HTTP请求本质 非阻塞 # sk = socket.socket() # sk.setblocking(False) # # try: # # 连接 # sk.connect(('www.baidu.com',80)) # IO阻塞 # print('连接成功') # except BlockingIOError as e: # print(e) # # # 发送消息 # sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n') # # # 等待服务端响应 # data = sk.recv(8096) # IO阻塞 # print(data) # # #关闭连接 # sk.close() ######################### class HttpRequest: def __init__(self,sk,host,callback): self.socket = sk self.host = host self.callback = callback def fileno(self): return self.socket.fileno() class AsyncRequest: def __init__(self): self.conn = [] self.connection = [] #用于检测是否监听成功 def add_request(self,host,callback): try: sk = socket.socket() sk.setblocking(0) sk.connect((host,80)) except BlockingIOError as e: pass request = HttpRequest(sk,host,callback) self.conn.append(request) self.connection.append(request) def run(self): while True: rlist,wlist,elist = select.select(self.conn,self.connection,self.conn,0.05) for w in wlist: print(w.host,'连接成功...') # 只要能循环到,表示socket和服务端已经连接成功 tpl = "GET / HTTP/1.0\r\nHost:%s\r\n\r\n" % (w.host,) w.socket.send(bytes(tpl,encoding='utf-8')) self.connection.remove(w) for r in rlist: # r 是Httprequest recv_data = bytes() while True: try: chunck = r.socket.recv(8096) recv_data += chunck except Exception as e: break # print(r.host,'有数据返回',recv_data) r.callback(recv_data) r.socket.close() self.conn.remove(r) if len(self.conn) == 0: break def f1(data): print('保存到文件',data) def f2(data): print('保存到数据库',data) url_list = [ {'host':'www.baidu.com','callback':f1}, {'host':'www.cnblogs.com','callback':f2}, {'host':'cn.bing.com','callback':f2} ] req = AsyncRequest() for item in url_list: req.add_request(item["host"],item["callback"]) req.run()