基于socket实现http请求
异步非阻塞模块原理
# 基于socket实现http请求 import socket # 多路IO复用模块 import select socket_list= [] url_list = ["http://www.baidu.com", ...] for url in url_list: sk = socket.socket() # 设置非阻塞模式 sk.setblocking(False) try: sk.connect((url, 80)) except BlockingIOError as e: print(e) # 为每个url请求都创建一个socket对象并把它放到一个列表里 socket_list.append(sk) # 事件循环 # tornado/twisted事件循环的原理就是通过这种方式来实现的 while True: # 监测socket是否连接成功了 # 监测socket是否可以进行接受数据了 r, w, e = select.select(socket_list, socket_list, [], 0.05) for obj in w: obj.send("GET / HTTP1.1\r\nhost:...") for obj in r: response = obj.recv(4096) print(response)
自定制一个基于socket实现的异步非阻塞模块
import socket import select import uuid # 定义一个Request类 对socket对象进行二次封装 class Request(object): def __init__(self, sock, info): self.sock = sock self.info = info def fileno(self): return self.sock.fileno() class Lou(object): def __init__(self): self.sock_list = [] self.conn_list = [] def add_request(self, req_info): sk = socket.socket() sk.setblocking(False) try: sk.connect((req_info['host'], req_info['port'])) except BlockingIOError as e: pass obj = Request(sk, req_info) self.conn_list.append(obj) self.sock_list.append(obj) def run(self): while True: # 只要列表中的对象有fileno方法就可以作为select的参数 r, w, e = select.select(self.sock_list, self.conn_list, [], 0.05) for obj in w: data = "GET %s HTTP/1.1\r\nhost:%s\r\n\r\n" % (obj.info["path"], obj.info["host"]) obj.sock.send(data.encode("utf-8")) self.conn_list.remove(obj) for obj in r: response = obj.sock.recv(8192) print(obj.info["host"], response) self.sock_list.remove(obj) obj.info["callback"](response) if not self.sock_list: break def done(response): filename = uuid.uuid4() with open("%s.html"%filename, "wb") as f: f.write(response) url_list = [ {"host": "www.baidu.com", "port": 80, "path": "/", "callback": done}, {"host": "www.cnblogs.com", "port": 80, "path": "/", "callback": done}, {"host": "www.bing.com", "port": 80, "path": "/", "callback": done}, ] l = Lou() for req_info in url_list: l.add_request(req_info) l.run()