异步IO
一、异步IO -客户端
1、使用selector 下的 DefaultSelector:
selector = DefaultSelector() # 自动选择,win下选select ,linux下选epoll
1.1、适用于linux epoll:
from urllib.parse import urlparse from selectors import DefaultSelector,EVENT_READ,EVENT_WRITE import socket selector = DefaultSelector() # 自动选择,win下选select ,linux下选epoll class Fetcher(object): # 模拟客户端(浏览器)向服务端请求url数据(即HTTP请求) # select + 回调函数 + 事件循环 ,经典模式 def readable(self, key): # 回调函数,接收服务器返回数据 d = self.client.recv(1024) # 不需要使用while True,只要有数据可读,自动会不断调用 if d: self.data += d else: selector.unregister(key.fd) # 注销掉该文件描述符(ready) data = self.data.decode("utf8") html_data = data.split("\r\n\r\n")[1] # html内容 print(html_data) self.client.close() def connected(self,key): # 回调函数,向服务器发送数据 selector.unregister(key.fd) # 注销掉该文件描述符(write/ready),此为write ,每次成功连接后都应该注销 (key:SelectorKey对象) # self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))# 发送请求头,编码成字节格式发送 self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\n\r\n".format(self.path, self.host).encode("utf8"))# 发送请求头,编码成字节格式发送 selector.register(self.client.fileno(), events=EVENT_READ, data=self.readable) # 注册 当可写时,说明服务器已经接收到我们的信息并给我们返回响应信息 def get_url(self,url): # url拆分 url = urlparse(url) # print(url) self.host = url.netloc self.path = url.path self.data = b"" if self.path == "": self.path = '/' # 建立连接 self.client = socket.socket(socket.AF_INET,socket.SOCK_STREAM) self.client.setblocking(False) # 设为不阻塞 try: self.client.connect((self.host,80)) except BlockingIOError as e: pass # 注册 可写时说明已经连接成功,可以向服务器发送数据 selector.register(self.client.fileno(),EVENT_WRITE,self.connected) def loop(): # 事件循环,不停的请求socket的状态并调用对应的回调函数 # 1. select本身是不支持register模式 # 2. socket状态变化以后的回调是由程序员完成的 ready = selector.select() # 不断请求socket状态 for key , mask in ready: # 当状态是我们需要的,调用相应的回调函数。注意:回调函数实际是在这里调用的,而不是在注册时调用 call_back = key.data # 回调函数,key是SelectorKey对象,里面的data即是回调函数 call_back(key) # 执行回调函数 ,key:SelectorKey对象 if __name__ == '__main__': import time start_time = time.time() for i in range(6): # url = "http://101.200.45.75:9000/course/detail/{}/".format(i+1) url = "http://www.baidu.com" # url = "http://shop.projectsedu.com/goods/{}/".format(i) fetcher = Fetcher() fetcher.get_url(url) loop() # 类似select IO多路复用,不断查询socket状态 print(time.time() - start_time)
1.2、适用于win select:
from urllib.parse import urlparse from selectors import DefaultSelector,EVENT_READ,EVENT_WRITE import socket selector = DefaultSelector() # 自动选择,win下选select ,linux下选epoll urls = [] # 用于判断爬虫是否全部爬完数据 stop = False class Fetcher(object): # 模拟客户端(浏览器)向服务端请求url数据(即HTTP请求) # select + 回调函数 + 事件循环 ,经典模式 def readable(self, key): # 回调函数,接收服务器返回数据 d = self.client.recv(1024) # 不需要使用while True,只要有数据可读,自动会不断调用 if d: self.data += d else: selector.unregister(key.fd) # 注销掉该文件描述符(ready) data = self.data.decode("utf8") html_data = data.split("\r\n\r\n")[1] # html内容 print(html_data) self.client.close() urls.remove(self.spider_url) if not urls: # 全部爬虫都完成数据爬取,准备关掉程序 global stop stop = True def connected(self,key): # 回调函数,向服务器发送数据 selector.unregister(key.fd) # 注销掉该文件描述符(write/ready),此为write ,每次成功连接后都应该注销 (key:SelectorKey对象) self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))# 发送请求头,编码成字节格式发送 # self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\n\r\n".format(self.path, self.host).encode("utf8"))# 发送请求头,编码成字节格式发送 selector.register(self.client.fileno(), events=EVENT_READ, data=self.readable) # 注册 当可写时,说明服务器已经接收到我们的信息并给我们返回响应信息 def get_url(self,url): self.spider_url = url # url拆分 url = urlparse(url) # print(url) self.host = url.netloc self.path = url.path self.data = b"" if self.path == "": self.path = '/' # 建立连接 self.client = socket.socket(socket.AF_INET,socket.SOCK_STREAM) self.client.setblocking(False) # 设为不阻塞 try: self.client.connect((self.host,80)) except BlockingIOError as e: pass # 注册 可写时说明已经连接成功,可以向服务器发送数据 selector.register(self.client.fileno(),EVENT_WRITE,self.connected) def loop(): # 事件循环,不停的请求socket的状态并调用对应的回调函数 # 1. select本身是不支持register模式 # 2. socket状态变化以后的回调是由程序员完成的 while not stop: ready = selector.select() # 不断请求socket状态 for key , mask in ready: # 当状态是我们需要的,调用相应的回调函数。注意:回调函数实际是在这里调用的,而不是在注册时调用 call_back = key.data # 回调函数,key是SelectorKey对象,里面的data即是回调函数 call_back(key) # 执行回调函数 ,key:SelectorKey对象 if __name__ == '__main__': import time start_time = time.time() for i in range(6): # url = "http://101.200.45.75:9000/course/detail/{}/".format(i+1) url = "http://www.baidu.com" # url = "http://shop.projectsedu.com/goods/{}/".format(i) urls.append(url) # 用于判断爬虫是否爬取完毕 fetcher = Fetcher() fetcher.get_url(url) loop() # 类似select IO多路复用,不断查询socket状态 print(time.time() - start_time)
1.3、使用aiohttp实现异步IO-客户端http请求:
import asyncio,aiohttp import time async def fetch_async(url): """ 异步化,模拟http请求 遇到await 挂起,转到下一个任务执行。await执行完成,再返回来执行await下面的代码 """ print(url) async with aiohttp.request("GET",url) as r: # aiohttp 中封装的request函数。发起http请求 reponse = await r.text(encoding="utf-8") #或者直接await r.read()不编码,直接读取,适合于图像等无法编码文件 print(reponse) # 代码执行完成会自动释放控制权 tasks = [fetch_async('http://www.baidu.com/'), fetch_async('http://www.chouti.com/'),fetch_async('http://www.chouti.com/'),fetch_async('http://www.chouti.com/'),fetch_async('http://www.chouti.com/')] start_time = time.time() event_loop = asyncio.get_event_loop() results = event_loop.run_until_complete(asyncio.gather(*tasks)) print("time:{}".format((time.time() - start_time))) event_loop.close()
二、异步IO -服务端
win下使用select 实现异步io -服务端:
import socket import select import time class HttpRequest(object): """ 用户封装用户请求信息 """ def __init__(self, content): """ :param content:用户发送的请求数据:请求头和请求体 """ self.content = content self.header_bytes = bytes() self.body_bytes = bytes() self.header_dict = {} self.method = "" self.url = "" self.protocol = "" self.initialize() self.initialize_headers() def initialize(self): temp = self.content.split(b'\r\n\r\n', 1) if len(temp) == 1: self.header_bytes += temp[0] else: h, b = temp self.header_bytes += h self.body_bytes += b @property def header_str(self): return str(self.header_bytes, encoding='utf-8') def initialize_headers(self): headers = self.header_str.split('\r\n') first_line = headers[0].split(' ') if len(first_line) == 3: self.method, self.url, self.protocol = headers[0].split(' ') for line in headers: kv = line.split(':') if len(kv) == 2: k, v = kv self.header_dict[k] = v class Future(object): def __init__(self,timeout=0): self.result = None self.timeout = timeout self.start = time.time() def main(request): #挂起当前请求,5秒后返回当前挂起请求 f = Future(5) return f def index(request): return "indexasdfasdfasdf" routers = [ ('/main/',main), ('/index/',index), ] def run(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(("127.0.0.1", 9999,)) sock.setblocking(False) sock.listen(128) inputs = [] inputs.append(sock) async_request_dict = { # 'socket': futrue } while True: rlist,wlist,elist = select.select(inputs,[],[],0.05) for r in rlist: if r == sock: """新请求到来""" conn,addr = sock.accept() conn.setblocking(False) inputs.append(conn) else: """客户端发来数据""" data = b"" while True: try: chunk = r.recv(1024) data = data + chunk except Exception as e: chunk = None if not chunk: break # data进行处理:请求头和请求体 request = HttpRequest(data) # 1. 请求头中获取url # 2. 去路由中匹配,获取指定的函数 # 3. 执行函数,获取返回值 # 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd') import re flag = False func = None for route in routers: if re.match(route[0],request.url): flag = True func = route[1] break if flag: result = func(request) if isinstance(result,Future): async_request_dict[r] = result else: r.sendall(b'HTTP/1.1 200 OK\r\n\r\n<html><body>hello,baby!</body></html>') inputs.remove(r) r.close() else: r.sendall(b"HTTP/1.1 200 OK\r\n\r\n<html><body>404</body></html>") inputs.remove(r) r.close() for conn in list(async_request_dict.keys()): future = async_request_dict[conn] start = future.start timeout = future.timeout ctime = time.time() if (start + timeout) <= ctime : future.result = b"timeout" if future.result: conn.sendall(b'HTTP/1.1 200 OK\r\n\r\n<html><body>hello web</body></html>') conn.close() del async_request_dict[conn] inputs.remove(conn) if __name__ == '__main__': run()
未来的你,会感谢现在努力的你!