kube-proxy ipvs
IPVS用法#
IPVS可以通过ipvsadm 命令进行配置,如-L列举,-A添加,-D删除。
如下命令创建一个service实例172.17.0.1:32016
,-t
指定监听的为TCP
端口,-s
指定算法为轮询算法rr(Round Robin),ipvs支持简单轮询(rr)、加权轮询(wrr)、最少连接(lc)、源地址或者目标地址散列(sh、dh)等10种调度算法。
ipvsadm -A -t 172.17.0.1:32016 -s rr
在添加调度算法的时候还需要用-r指定server地址,-w指定权值,-m指定转发模式,-m设置masquerading表示NAT模式(-g为gatewaying
,即直连路由模式),如下所示:
ipvsadm -a -t 172.17.0.1:32016 -r 10.244.1.2:8080 -m -w 1
ipvsadm -a -t 172.17.0.1:32016 -r 10.244.1.3:8080 -m -w 1
ipvsadm -a -t 172.17.0.1:32016 -r 10.244.3.2:8080 -m -w 1
root@cloud:~# ipvsadm -S -n | grep 30091 -A -t 10.10.16.47:30091 -s rr -a -t 10.10.16.47:30091 -r 10.244.41.7:80 -m -w 1 -a -t 10.10.16.47:30091 -r 10.244.129.131:80 -m -w 1 -A -t 10.244.2.1:30091 -s rr -a -t 10.244.2.1:30091 -r 10.244.41.7:80 -m -w 1 -a -t 10.244.2.1:30091 -r 10.244.129.131:80 -m -w 1 -A -t 127.0.0.1:30091 -s rr -a -t 127.0.0.1:30091 -r 10.244.41.7:80 -m -w 1 -a -t 127.0.0.1:30091 -r 10.244.129.131:80 -m -w 1 -A -t 172.17.0.1:30091 -s rr -a -t 172.17.0.1:30091 -r 10.244.41.7:80 -m -w 1 -a -t 172.17.0.1:30091 -r 10.244.129.131:80 -m -w 1
以 ipvs 模式 运行kube-proxy
前提条件
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack_ipv4
grep -e ipvs -e nf_conntrack_ipv4 /lib/modules/$(uname -r)/modules.builtin
确保IPVS需要内核模块
root@cloud:~# grep -e ipvs -e nf_conntrack_ipv4 /lib/modules/$(uname -r)/modules.builtin root@cloud:~# modprobe -- ip_vs root@cloud:~# modprobe -- ip_vs_rr root@cloud:~# modprobe -- ip_vs_wrr root@cloud:~# modprobe -- ip_vs_sh root@cloud:~# modprobe -- nf_conntrack_ipv4 modprobe: FATAL: Module nf_conntrack_ipv4 not found in directory /lib/modules/5.5.19-050519-generic root@cloud:~#
root@cloud:~# modprobe -- nf_conntrack root@cloud:~# grep -e nf_conntrack /lib/modules/$(uname -r)/modules.builtin root@cloud:~#
linux kernel 4.19版本已经将nf_conntrack_ipv4 更新为 nf_conntrack, 而 kube-proxy 1.13 以下版本,强依赖 nf_conntrack_ipv4。
modprobe br_netfilter cat > /etc/sysconfig/modules/ipvs.modules <<EOF #!/bin/bash modprobe -- ip_vs modprobe -- ip_vs_rr modprobe -- ip_vs_wrr modprobe -- ip_vs_sh modprobe -- nf_conntrack EOF chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack
root@ubuntu:~# mkdir -p /etc/sysconfig/modules/ root@ubuntu:~# cat > /etc/sysconfig/modules/ipvs.modules <<EOF > #!/bin/bash > modprobe -- ip_vs > modprobe -- ip_vs_rr > modprobe -- ip_vs_wrr > modprobe -- ip_vs_sh > modprobe -- nf_conntrack > EOF root@ubuntu:~# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack ip_vs_sh 16384 0 ip_vs_wrr 16384 0 ip_vs_rr 16384 0 ip_vs 167936 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr nf_conntrack_netlink 53248 0 nfnetlink 20480 3 nf_conntrack_netlink,ip_set nf_conntrack 155648 8 xt_conntrack,nf_nat,nf_nat_ipv6,ipt_MASQUERADE,nf_nat_ipv4,xt_nat,nf_conntrack_netlink,ip_vs nf_defrag_ipv6 24576 2 nf_conntrack,ip_vs nf_defrag_ipv4 16384 1 nf_conntrack libcrc32c 16384 5 nf_conntrack,nf_nat,btrfs,raid456,ip_vs root@ubuntu:~#
安装ipvs相关软件包
yum -y install ipvsadm ipset
kubectl edit configmap kube-proxy -n kube-system
mode: "ipvs"
root@ubuntu:~# kubectl get pods -n kube-system -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES calico-kube-controllers-5978c5f6b5-tk6pg 1/1 Running 0 6d16h 10.244.243.194 ubuntu <none> <none> calico-node-2fp7r 1/1 Running 0 42h 10.10.16.251 centos7 <none> <none> calico-node-j4g4h 1/1 Running 0 24h 10.10.16.82 ubuntu <none> <none> calico-node-knqxw 1/1 Running 0 42h 10.10.16.81 bogon <none> <none> calico-node-sl4sz 1/1 Running 0 42h 10.10.16.47 cloud <none> <none> coredns-66bff467f8-hlbzk 1/1 Running 0 3d21h 10.244.29.1 bogon <none> <none> coredns-66bff467f8-zx85v 1/1 Running 0 3d21h 10.244.41.1 cloud <none> <none> etcd-ubuntu 1/1 Running 4 6d16h 10.10.16.82 ubuntu <none> <none> kube-apiserver-ubuntu 1/1 Running 7 6d16h 10.10.16.82 ubuntu <none> <none> kube-controller-manager-ubuntu 1/1 Running 5 6d16h 10.10.16.82 ubuntu <none> <none> kube-proxy-798sq 1/1 Running 0 6d16h 10.10.16.47 cloud <none> <none> kube-proxy-8hh62 1/1 Running 0 6d16h 10.10.16.82 ubuntu <none> <none> kube-proxy-kwcdg 1/1 Running 0 44h 10.10.16.251 centos7 <none> <none> kube-proxy-l268b 1/1 Running 0 6d16h 10.10.16.81 bogon <none> <none> kube-scheduler-ubuntu 1/1 Running 7 6d16h 10.10.16.82 ubuntu <none> <none> root@ubuntu:~# kubectl get pod -n kube-system|grep kube-proxy|awk '{print "kubectl delete po "$1" -n kube-system"}'|sh pod "kube-proxy-798sq" deleted pod "kube-proxy-8hh62" deleted pod "kube-proxy-kwcdg" deleted pod "kube-proxy-l268b" deleted root@ubuntu:~# kubectl logs kube-proxy -n kube-system Error from server (NotFound): pods "kube-proxy" not found root@ubuntu:~# kubectl get pods -n kube-system -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES calico-kube-controllers-5978c5f6b5-tk6pg 1/1 Running 0 6d16h 10.244.243.194 ubuntu <none> <none> calico-node-2fp7r 1/1 Running 0 42h 10.10.16.251 centos7 <none> <none> calico-node-j4g4h 1/1 Running 0 24h 10.10.16.82 ubuntu <none> <none> calico-node-knqxw 1/1 Running 0 42h 10.10.16.81 bogon <none> <none> calico-node-sl4sz 1/1 Running 0 42h 10.10.16.47 cloud <none> <none> coredns-66bff467f8-hlbzk 1/1 Running 0 3d21h 10.244.29.1 bogon <none> <none> coredns-66bff467f8-zx85v 1/1 Running 0 3d21h 10.244.41.1 cloud <none> <none> etcd-ubuntu 1/1 Running 4 6d16h 10.10.16.82 ubuntu <none> <none> kube-apiserver-ubuntu 1/1 Running 7 6d16h 10.10.16.82 ubuntu <none> <none> kube-controller-manager-ubuntu 1/1 Running 5 6d16h 10.10.16.82 ubuntu <none> <none> kube-proxy-5w89t 1/1 Running 0 99s 10.10.16.47 cloud <none> <none> kube-proxy-96qlg 1/1 Running 0 97s 10.10.16.82 ubuntu <none> <none> kube-proxy-cqn7b 1/1 Running 0 87s 10.10.16.81 bogon <none> <none> kube-proxy-xrqsb 1/1 Running 0 94s 10.10.16.251 centos7 <none> <none> kube-scheduler-ubuntu 1/1 Running 7 6d16h 10.10.16.82 ubuntu <none> <none> root@ubuntu:~# kubectl logs kube-proxy-5w89t -n kube-system I0625 03:47:54.614160 1 node.go:136] Successfully retrieved node IP: 10.10.16.47 I0625 03:47:54.614248 1 server_others.go:259] Using ipvs Proxier. W0625 03:47:54.614794 1 proxier.go:429] IPVS scheduler not specified, use rr by default I0625 03:47:54.615248 1 server.go:583] Version: v1.18.1 I0625 03:47:54.616438 1 conntrack.go:52] Setting nf_conntrack_max to 2097152 I0625 03:47:54.616952 1 config.go:315] Starting service config controller I0625 03:47:54.616987 1 config.go:133] Starting endpoints config controller I0625 03:47:54.617008 1 shared_informer.go:223] Waiting for caches to sync for service config I0625 03:47:54.617041 1 shared_informer.go:223] Waiting for caches to sync for endpoints config I0625 03:47:54.717171 1 shared_informer.go:230] Caches are synced for service config I0625 03:47:54.717256 1 shared_informer.go:230] Caches are synced for endpoints config
创建pod
root@ubuntu:~# cat web-anti-affinity-two.yaml apiVersion: apps/v1 kind: Deployment metadata: name: web-ipvs spec: selector: matchLabels: app: web-ipvs replicas: 2 template: metadata: labels: app: web-ipvs spec: affinity: #pod 反亲和性, 打散 web-ipvs 各个副本 podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: app operator: In values: - web-ipvs topologyKey: "kubernetes.io/hostname" containers: - image: nginx imagePullPolicy: IfNotPresent name: web2-worker ports: - containerPort: 80 protocol: TCP nodeSelector: rr-group: rr2
root@ubuntu:~# kubectl get svc NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE ipvs-nodeport-svc NodePort 10.111.249.68 <none> 30001:30091/TCP 6m53s kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 6d19h nodeport-svc NodePort 10.102.82.74 <none> 3000:30090/TCP 3d3h root@ubuntu:~# kubectl get pods -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES web-ipvs-777f69dbf8-qc27c 1/1 Running 0 15m 10.244.129.131 centos7 <none> <none> web-ipvs-777f69dbf8-xsscw 1/1 Running 0 15m 10.244.41.7 cloud <none> <none>
创建svc
root@ubuntu:~# cat web-ipvs-svc.yml apiVersion: v1 kind: Service metadata: name: ipvs-nodeport-svc spec: type: NodePort selector: app: web-ipvs ports: - protocol: TCP port: 30001 targetPort: 80 nodePort: 30091
root@ubuntu:~# ipvsadm -ln
root@cloud:~# ipvsadm -ln --stats IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes -> RemoteAddress:Port TCP 172.17.0.1:30090 0 0 0 0 0 -> 10.244.29.6:80 0 0 0 0 0 -> 10.244.41.5:80 0 0 0 0 0 -> 10.244.129.129:80 0 0 0 0 0 -> 10.244.243.199:80 0 0 0 0 0 TCP 10.10.16.47:30090 0 0 0 0 0 -> 10.244.29.6:80 0 0 0 0 0 -> 10.244.41.5:80 0 0 0 0 0 -> 10.244.129.129:80 0 0 0 0 0 -> 10.244.243.199:80 0 0 0 0 0 TCP 10.10.16.47:30091 2 12 8 1456 1992 -> 10.244.41.7:80 1 4 2 216 112 -> 10.244.129.131:80 1 8 6 1240 1880
dnat不经过iptables
root@cloud:~# iptables -nvL -t nat | grep 30091 root@cloud:~#
snat contrack
root@cloud:~# conntrack -L -o ktimestamp | grep 10.244.41.7 conntrack v1.4.4 (conntrack-tools): 101 flow entries have been shown. tcp 6 110 TIME_WAIT src=192.168.117.51 dst=10.10.16.47 sport=57852 dport=30091 src=10.244.41.7 dst=10.10.16.47 sport=80 dport=49282 [ASSURED] mark=0 use=1
[root@centos7 ~]# conntrack -L -o ktimestamp | grep 10.244.129.131 conntrack v1.4.4 (conntrack-tools): 53 flow entries have been shown. tcp 6 85 TIME_WAIT src=10.10.16.47 dst=10.244.129.131 sport=57982 dport=80 src=10.244.129.131 dst=10.10.16.47 sport=80 dport=57982 [ASSURED] mark=0 secctx=system_u:object_r:unlabeled_t:s0 use=1 [root@centos7 ~]#
centos pod tcpdump
root@cloud:~# nsenter -n --target 989208 root@cloud:~# tcpdump -i eth0 tcp and port 80 -eennvv tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes 14:42:28.436925 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 56, id 32767, offset 0, flags [DF], proto TCP (6), length 60) 10.10.16.47.49282 > 10.244.41.7.80: Flags [S], cksum 0x9ad6 (correct), seq 1352807746, win 64240, options [mss 1460,nop,wscale 8,sackOK,TS val 279186601 ecr 0], length 0 14:42:28.437001 ee:14:08:88:a5:c1 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60) 10.244.41.7.80 > 10.10.16.47.49282: Flags [S.], cksum 0x4e62 (incorrect -> 0xcb71), seq 394415451, ack 1352807747, win 65236, options [mss 1400,sackOK,TS val 1332352869 ecr 279186601,nop,wscale 7], length 0 14:42:28.438410 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32769, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.49282 > 10.244.41.7.80: Flags [.], cksum 0xf4d4 (correct), seq 1, ack 1, win 1024, options [nop,nop,TS val 279186603 ecr 1332352869], length 0 14:42:31.552422 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32777, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.49282 > 10.244.41.7.80: Flags [F.], cksum 0xe8a9 (correct), seq 1, ack 1, win 1024, options [nop,nop,TS val 279189717 ecr 1332352869], length 0 14:42:31.552579 ee:14:08:88:a5:c1 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 37122, offset 0, flags [DF], proto TCP (6), length 52) 10.244.41.7.80 > 10.10.16.47.49282: Flags [F.], cksum 0x4e5a (incorrect -> 0xde7e), seq 1, ack 2, win 510, options [nop,nop,TS val 1332355985 ecr 279189717], length 0 14:42:31.555556 ee:ee:ee:ee:ee:ee > ee:14:08:88:a5:c1, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 56, id 32780, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.49282 > 10.244.41.7.80: Flags [.], cksum 0xdc79 (correct), seq 2, ack 2, win 1024, options [nop,nop,TS val 279189720 ecr 1332355985], length 0
root@cloud:~# route -n Kernel IP routing table Destination Gateway Genmask Flags Metric Ref Use Iface 0.0.0.0 10.10.16.254 0.0.0.0 UG 0 0 0 enahisic2i0 9.251.0.0 172.17.0.1 255.255.0.0 UG 0 0 0 docker0 10.10.16.0 0.0.0.0 255.255.255.0 U 0 0 0 enahisic2i0 10.99.1.231 10.10.16.82 255.255.255.255 UGH 0 0 0 enahisic2i0 10.110.79.116 10.10.16.82 255.255.255.255 UGH 0 0 0 enahisic2i0 10.110.171.213 10.10.16.82 255.255.255.255 UGH 0 0 0 enahisic2i0 10.244.2.0 0.0.0.0 255.255.255.0 U 0 0 0 cni0 10.244.41.0 0.0.0.0 255.255.255.192 U 0 0 0 * 10.244.41.1 0.0.0.0 255.255.255.255 UH 0 0 0 cali027a65c4a41 10.244.41.5 0.0.0.0 255.255.255.255 UH 0 0 0 cali4cba7a26a1f 10.244.41.7 0.0.0.0 255.255.255.255 UH 0 0 0 calid88772f1084 10.244.129.128 10.10.16.251 255.255.255.192 UG 0 0 0 enahisic2i0 31.31.31.31 10.10.16.254 255.255.255.255 UGH 0 0 0 enahisic2i0 172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0 root@cloud:~# ip a | grep 10.244.41 ---------没没有设备有10.244.41.xx段的ip
root@cloud:~#
centos7 pod tcpdump
设备有10.244.129.xx段的ip
[root@centos7 ~]# ip a | grep 10.244.41
[root@centos7 ~]# ip a | grep 10.244.129
[root@centos7 ~]#
源ip是10.10.16.47
[root@centos7 ~]# nsenter -n --target 120293 [root@centos7 ~]# ip a 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN group default qlen 1000 link/ipip 0.0.0.0 brd 0.0.0.0 4: eth0@if24: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1480 qdisc noqueue state UP group default link/ether ae:ef:a2:57:fc:f2 brd ff:ff:ff:ff:ff:ff link-netnsid 0 inet 10.244.129.131/32 brd 10.244.129.131 scope global eth0 valid_lft forever preferred_lft forever inet6 fe80::acef:a2ff:fe57:fcf2/64 scope link valid_lft forever preferred_lft forever [root@centos7 ~]# tcpdump -i eth0 tcp and port 80 -eennvv tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes 02:42:28.443016 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 55, id 32768, offset 0, flags [DF], proto TCP (6), length 60) 10.10.16.47.57982 > 10.244.129.131.80: Flags [S], cksum 0x8413 (correct), seq 3100666206, win 64240, options [mss 1460,nop,wscale 8,sackOK,TS val 279186601 ecr 0], length 0 02:42:28.443050 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 74: (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60) 10.244.129.131.80 > 10.10.16.47.57982: Flags [S.], cksum 0xa6de (incorrect -> 0x0b87), seq 1401284533, ack 3100666207, win 28560, options [mss 1440,sackOK,TS val 1717461591 ecr 279186601,nop,wscale 7], length 0 02:42:28.444316 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32771, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0xa5cb (correct), seq 1, ack 1, win 1026, options [nop,nop,TS val 279186603 ecr 1717461591], length 0 02:42:28.454514 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 501: (tos 0x0, ttl 55, id 32772, offset 0, flags [DF], proto TCP (6), length 487) 10.10.16.47.57982 > 10.244.129.131.80: Flags [P.], cksum 0xc92a (correct), seq 1:436, ack 1, win 1026, options [nop,nop,TS val 279186612 ecr 1717461591], length 435: HTTP, length: 435 GET / HTTP/1.1 Host: 10.10.16.47:30091 Connection: keep-alive Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 02:42:28.454527 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 398, offset 0, flags [DF], proto TCP (6), length 52) 10.244.129.131.80 > 10.10.16.47.57982: Flags [.], cksum 0xa6d6 (incorrect -> 0xa71e), seq 1, ack 436, win 232, options [nop,nop,TS val 1717461602 ecr 279186612], length 0 02:42:28.455151 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 304: (tos 0x0, ttl 64, id 399, offset 0, flags [DF], proto TCP (6), length 290) 10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa7c4 (incorrect -> 0xdb6c), seq 1:239, ack 436, win 232, options [nop,nop,TS val 1717461603 ecr 279186612], length 238: HTTP, length: 238 HTTP/1.1 200 OK Server: nginx/1.21.0 Date: Fri, 25 Jun 2021 06:42:28 GMT Content-Type: text/html Content-Length: 612 Last-Modified: Tue, 25 May 2021 12:28:56 GMT Connection: keep-alive ETag: "60aced88-264" Accept-Ranges: bytes 02:42:28.455420 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 678: (tos 0x0, ttl 64, id 400, offset 0, flags [DF], proto TCP (6), length 664) 10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa93a (incorrect -> 0xe7f7), seq 239:851, ack 436, win 232, options [nop,nop,TS val 1717461603 ecr 279186612], length 612: HTTP 02:42:28.458165 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32773, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0xa0af (correct), seq 436, ack 851, win 1023, options [nop,nop,TS val 279186617 ecr 1717461603], length 0 02:42:28.558161 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 447: (tos 0x0, ttl 55, id 32775, offset 0, flags [DF], proto TCP (6), length 433) 10.10.16.47.57982 > 10.244.129.131.80: Flags [P.], cksum 0xa76c (correct), seq 436:817, ack 851, win 1023, options [nop,nop,TS val 279186716 ecr 1717461603], length 381: HTTP, length: 381 GET /favicon.ico HTTP/1.1 Host: 10.10.16.47:30091 Connection: keep-alive User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36 Accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8 Referer: http://10.10.16.47:30091/ Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 02:42:28.558313 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 776: (tos 0x0, ttl 64, id 401, offset 0, flags [DF], proto TCP (6), length 762) 10.244.129.131.80 > 10.10.16.47.57982: Flags [P.], cksum 0xa99c (incorrect -> 0x7286), seq 851:1561, ack 817, win 240, options [nop,nop,TS val 1717461706 ecr 279186716], length 710: HTTP, length: 710 HTTP/1.1 404 Not Found Server: nginx/1.21.0 Date: Fri, 25 Jun 2021 06:42:28 GMT Content-Type: text/html Content-Length: 555 Connection: keep-alive <html> <head><title>404 Not Found</title></head> <body> <center><h1>404 Not Found</h1></center> <hr><center>nginx/1.21.0</center> </body> </html> <!-- a padding to disable MSIE and Chrome friendly error page --> <!-- a padding to disable MSIE and Chrome friendly error page --> <!-- a padding to disable MSIE and Chrome friendly error page --> <!-- a padding to disable MSIE and Chrome friendly error page --> <!-- a padding to disable MSIE and Chrome friendly error page --> <!-- a padding to disable MSIE and Chrome friendly error page --> 02:42:28.605575 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32776, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0x9b6f (correct), seq 817, ack 1561, win 1026, options [nop,nop,TS val 279186764 ecr 1717461706], length 0 02:42:31.558228 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32778, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.57982 > 10.244.129.131.80: Flags [F.], cksum 0x8fe5 (correct), seq 817, ack 1561, win 1026, options [nop,nop,TS val 279189717 ecr 1717461706], length 0 02:42:31.558342 ae:ef:a2:57:fc:f2 > ee:ee:ee:ee:ee:ee, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 64, id 402, offset 0, flags [DF], proto TCP (6), length 52) 10.244.129.131.80 > 10.10.16.47.57982: Flags [F.], cksum 0xa6d6 (incorrect -> 0x873e), seq 1561, ack 818, win 240, options [nop,nop,TS val 1717464706 ecr 279189717], length 0 02:42:31.561333 ee:ee:ee:ee:ee:ee > ae:ef:a2:57:fc:f2, ethertype IPv4 (0x0800), length 66: (tos 0x0, ttl 55, id 32779, offset 0, flags [DF], proto TCP (6), length 52) 10.10.16.47.57982 > 10.244.129.131.80: Flags [.], cksum 0x8429 (correct), seq 818, ack 1562, win 1026, options [nop,nop,TS val 279189720 ecr 1717464706], length 0
root@cloud:~# iptables -nvL -t nat | grep 30091 root@cloud:~# iptables -t nat -nL KUBE-SERVICES Chain KUBE-SERVICES (2 references) target prot opt source destination KUBE-MARK-MASQ all -- !10.244.0.0/16 0.0.0.0/0 /* Kubernetes service cluster ip + port for masquerade purpose */ match-set KUBE-CLUSTER-IP dst,dst KUBE-NODE-PORT all -- 0.0.0.0/0 0.0.0.0/0 ADDRTYPE match dst-type LOCAL ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 match-set KUBE-CLUSTER-IP dst,dst root@cloud:~# iptables -t nat -nL KUBE-NODE-PORT Chain KUBE-NODE-PORT (1 references) target prot opt source destination KUBE-MARK-MASQ tcp -- 0.0.0.0/0 0.0.0.0/0 /* Kubernetes nodeport TCP port for masquerade purpose */ match-set KUBE-NODE-PORT-TCP dst root@cloud:~# iptables -t nat -nL KUBE-MARK-MASQ Chain KUBE-MARK-MASQ (15 references) target prot opt source destination MARK all -- 0.0.0.0/0 0.0.0.0/0 MARK or 0x4000 root@cloud:~# ipset list KUBE-NODE-PORT-TCP Name: KUBE-NODE-PORT-TCP Type: bitmap:port Revision: 3 Header: range 0-65535 Size in memory: 8264 References: 1 Number of entries: 2 Members: 30090 30091 root@cloud:~# iptables -t nat -nL POSTROUTING Chain POSTROUTING (policy ACCEPT) target prot opt source destination cali-POSTROUTING all -- 0.0.0.0/0 0.0.0.0/0 /* cali:O3lYWMrLQYEMJtB5 */ KUBE-POSTROUTING all -- 0.0.0.0/0 0.0.0.0/0 /* kubernetes postrouting rules */ MASQUERADE all -- 172.17.0.0/16 0.0.0.0/0 ANTREA-POSTROUTING all -- 0.0.0.0/0 0.0.0.0/0 /* Antrea: jump to Antrea postrouting rules */ RETURN all -- 10.244.0.0/16 10.244.0.0/16 MASQUERADE all -- 10.244.0.0/16 !224.0.0.0/4 RETURN all -- !10.244.0.0/16 10.244.2.0/24 MASQUERADE all -- !10.244.0.0/16 10.244.0.0/16 root@cloud:~# iptables -t nat -nL KUBE-POSTROUTING Chain KUBE-POSTROUTING (1 references) target prot opt source destination MASQUERADE all -- 0.0.0.0/0 0.0.0.0/0 /* kubernetes service traffic requiring SNAT */ mark match 0x4000/0x4000 MASQUERADE all -- 0.0.0.0/0 0.0.0.0/0 /* Kubernetes endpoints dst ip:port, source ip for solving hairpin purpose */ match-set KUBE-LOOP-BACK dst,dst,src root@cloud:~#
ipvs 实现Kube Proxy 的功能
root@ubuntu:~# kubectl delete svc ipvs-nodeport-svc service "ipvs-nodeport-svc" deleted root@ubuntu:~# kubectl get svc NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 6d20h nodeport-svc NodePort 10.102.82.74 <none> 3000:30090/TCP 3d4h root@ubuntu:~#
root@ubuntu:~# ipvsadm -A -t 10.10.16.82:30091 -s rr root@ubuntu:~# ipvsadm -a -t 10.10.16.82:30091 -r 10.244.41.7:80 -m Memory allocation problem root@ubuntu:~# ipvsadm -a -t 10.10.16.82:30091 -r 10.244.129.131:80 -m Memory allocation problem root@ubuntu:~#
root@ubuntu:~# ipvsadm -D -t 10.10.16.82:30091 Memory allocation problem root@ubuntu:~#
root@cloud:~# ipvsadm -A -t 10.10.16.47:30091 -s rr root@cloud:~# ipvsadm -a -t 10.10.16.47:30091 -r 10.244.41.7:80 -m root@cloud:~# ipvsadm -a -t 10.10.16.47:30091 -r 10.244.129.131:80 -m Memory allocation problem root@cloud:~#
一些解释:
- 对于所有发往 10.10.16.82:30091
的流量,将负载均衡到 10.244.41.7:80
和 10.244.129.131:80 - 使用轮询 (rr) 算法实现负载均衡
- 两个后端,每个后端的权重为 1(各 50%)
- 使用 MASQ(增强型 SNAT)在 VIP 和 RealIP 之间进行流量转发
KubeProxy的IPVS模式
深入理解 Kubernetes 网络模型:自己实现 Kube Proxy 的功能