python 实现简单 http 代理
http 代理有什么用处?
1,可以是插入 js 广告,某些 isp 就是这么干的,强插广告,现在 https 的网站越来越多了,插入不了。
2, 用来调试 app 或是别的程序,可以看到详细的 http 请求,响应,fiddler 这个软件也是做这个的,但它不开源。
用浏览器打开测试的 HTTP 请求 http://localhost/logo.gif 带端口号的 http://localhost:8000/logo.gif
使用 wireshark 抓包:
GET http://localhost/logo.gif HTTP/1.1 Host: localhost User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Accept-Encoding: gzip, deflate DNT: 1 Connection: keep-alive Upgrade-Insecure-Requests: 1 #或第 2 种 GET http://localhost:8000/logo.gif HTTP/1.1 Host: localhost:8000 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Accept-Encoding: gzip, deflate DNT: 1 Connection: keep-alive Upgrade-Insecure-Requests: 1
原理:
http 1.1 也就是 tcp 连接,有 header 和 body ,更多复杂的细节这里不做介绍(session cookie 缓存等)
下面使用 python 实现, listen 8000 端口,分析原始的 请求网址的HOST 和端口,建立连接拿回数据转发。
设置浏览器,使用 localhost 8000 http 代理方式 。
下图以 firefox 设置为例:
源码 py2.7:
1 #!/usr/bin/env python 2 #coding:utf-8 3 import socket 4 import sys 5 import re 6 import os 7 import time 8 import urllib 9 import urllib2 10 import threading 11 12 HEADER_SIZE = 4096 13 14 host = '0.0.0.0' 15 port = 8000 16 17 #子进程进行socket 网络请求 18 def http_socket(conn, addr): 19 print("client connent:{0}:{1}".format(addr[0], addr[1])) 20 try: 21 #读取 http 请求头信息 22 request_header = conn.recv(HEADER_SIZE) 23 #拆分头信息 24 host_addr = request_header.split("\r\n")[1].split(":") 25 #如果未指定端口则为默认 80 26 if 2 == len(host_addr): 27 host_addr.append("80") 28 name, host, port = map(lambda x: x.strip(), host_addr) 29 #建立 socket tcp 连接 30 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 31 sock.connect((host, int(port))) 32 #发送原始请求头 33 sock.sendall(request_header) 34 #接收数据并发送给浏览器 35 while(True): 36 resp = sock.recv(512) 37 if resp: 38 conn.sendall(resp) 39 else: 40 break 41 #关闭连接 42 sock.close() 43 except Exception as e: 44 print("http socket error") 45 print(e) 46 47 #创建socket对象 48 http_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 49 try: 50 http_server.bind((host, port)) 51 except: 52 sys.exit("python proxy bind error ") 53 54 print("python proxy start") 55 56 http_server.listen(1024) 57 58 while True: 59 conn, addr = http_server.accept() 60 http_thread = threading.Thread(target=http_socket, args=(conn, addr)) 61 http_thread.start() 62 time.sleep(1) 63 64 #关闭所有连接 65 http_server.close() 66 print("python proxy close")
测试下载大点的文件:
正常。
缺点是,性能不好,没有实现 Connection : keep-alive 打开一次就关闭了。
在 win10 python2.7 ubuntu 16.0.4 上应该都可以用
select 版 修改了 keep-alive 不是每次都关闭连接
1 #!/usr/bin/env python 2 #coding:utf-8 3 import socket 4 import sys 5 import re 6 import os 7 import time 8 import select 9 import threading 10 11 HEADER_SIZE = 4096 12 13 host = '0.0.0.0' 14 port = 8000 15 16 #子进程进行socket 网络请求 17 def http_socket(client, addr): 18 #创建 select 检测 fd 列表 19 inputs = [client] 20 outputs = [] 21 remote_socket = 0 22 print("client connent:{0}:{1}".format(addr[0], addr[1])) 23 while True: 24 readable, writable, exceptional = select.select(inputs, outputs, inputs) 25 try: 26 for s in readable: 27 if s is client: 28 #读取 http 请求头信息 29 request_header = s.recv(HEADER_SIZE) 30 if remote_socket is 0: 31 #拆分头信息 32 host_addr = request_header.split("\r\n")[1].split(":") 33 #如果未指定端口则为默认 80 34 if 2 == len(host_addr): 35 host_addr.append("80") 36 name, host, port = map(lambda x: x.strip(), host_addr) 37 #建立 socket tcp 连接 38 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 39 sock.connect((host, int(port))) 40 remote_socket = sock 41 inputs.append(sock) 42 #发送原始请求头 43 remote_socket.sendall(request_header) 44 else: 45 #接收数据并发送给浏览器 46 while(True): 47 resp = s.recv(512) 48 if resp: 49 client.sendall(resp) 50 else: 51 break 52 except Exception as e: 53 print("http socket error {0}".format(e)) 54 55 #创建socket对象 56 http_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 57 try: 58 http_server.bind((host, port)) 59 except: 60 sys.exit("python proxy bind error ") 61 62 print("python proxy start") 63 64 http_server.listen(1024) 65 66 while True: 67 client, addr = http_server.accept() 68 http_thread = threading.Thread(target=http_socket, args=(client, addr)) 69 http_thread.start() 70 time.sleep(1) 71 72 #关闭所有连接 73 http_server.close() 74 print("python proxy close")
https 的实现稍复杂一点,下面讲。