k8s-一次生产故障分析——kubelet堆栈和源码分析

一、node 不停重启。变为notreadly,kubelet 进程一直在,但是不打日志。

apiserver上面执行下这个脚本打印堆栈,正常和异常打印对比。192.168.10.81为异常或者正常的nodeip

curl  "https://192.168.10.81:10250/debug/pprof/goroutine?debug=2" --cacert /etc/kubernetes/pki/ca.crt --cert /etc/kubernetes/pki/apiserver-kubelet-client.crt --key /etc/kubernetes/pki/apiserver-kubelet-client.key  -k > stack.81

堆栈打印结果。

goroutine 268 [select, 50 minutes]:
net.(*Resolver).LookupIPAddr(0x5d48420, 0x5c1bc40, 0xc420014070, 0xc422505434, 0xc, 0xc4218400f0, 0x27, 0x0, 0x0, 0xc420daf820)
        /usr/local/go/src/net/lookup.go:196 +0x52b

goroutine 268 [select, 29 minutes]:
net.(*Resolver).LookupIPAddr(0x5d48420, 0x5c1bc40, 0xc420014070, 0xc422505434, 0xc, 0xc4218400f0, 0x27, 0x0, 0x0, 0xc420daf820)
        /usr/local/go/src/net/lookup.go:196 +0x52b

代码:

func (kl *Kubelet) setUpdatedAddressesFromHostname(node *api.Node) {
    addr := net.ParseIP(kl.hostname)
    if addr == nil {
        addrs, err := net.LookupIP(node.Name)   #如果没有ip,则需要通过

        if err != nil {
            glog.Errorf("Can't get ip address of node %s, so node addresses will be stale: %v", node.Name, err)
            return
        }

        if len(addrs) == 0 {
            glog.Errorf("No ip address for node %v, so node addresses will be stale", node.Name)
            return
        }

        // check all ip addresses for this node.Name and try to find the first non-loopback IPv4 address.
        // If no match is found, it uses the IP of the interface with gateway on it.
        for _, ip := range addrs {
            if !ip.IsLoopback() && ip.To4() != nil {
                addr = ip
                break
            }
        }

        if addr == nil {
            ip, err := util.ChooseHostInterface()
            if err != nil {
                glog.Errorf("Failed choosing host interface, so node addresses will be stale: %v", err)
                return
            }
            addr = ip
        }
    }

在启动参数加上

 /usr/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --pod-manifest-path=/etc/kubernetes/manifests --allow-privileged=true --network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin --cluster-dns=10.96.0.10 --cluster-domain=cluster.local --authorization-mode=Webhook --client-ca-file=/etc/kubernetes/pki/ca.crt --cadvisor-port=0 --cgroup-driver=cgroupfs --rotate-certificates=true --cert-dir=/var/lib/kubelet/pki --node-ip=192.168.10.82

 

二、其他相关排查,ss-antp 网络排查。

[root@H-LDOCKER-02 ~]# ss -antp |grep 192.168.10.81
ESTAB      0      0      192.168.10.82:49079              192.168.10.81:24007               users:(("glusterfs",pid=126656,fd=11))
ESTAB      0      0      192.168.10.82:48887              192.168.10.81:49152               users:(("glusterfs",pid=30768,fd=37))
ESTAB      0      0      192.168.10.82:49018              192.168.10.81:24007               users:(("glusterd",pid=2762,fd=122))
TIME-WAIT  0      0      192.168.10.82:46448              192.168.10.81:10250              
ESTAB      0      0      192.168.10.82:48891              192.168.10.81:49165               users:(("glusterfs",pid=30768,fd=31))
ESTAB      0      0      192.168.10.82:49158              192.168.10.81:49073               users:(("glusterfsd",pid=2929,fd=5))
ESTAB      0      0      192.168.10.82:49044              192.168.10.81:49160               users:(("glusterfs",pid=7469,fd=14))
ESTAB      0      0      192.168.10.82:2380               192.168.10.81:41728               users:(("etcd",pid=65237,fd=240))
ESTAB      0      0      192.168.10.82:57894              192.168.10.81:2379                users:(("kube-apiserver",pid=76134,fd=45))
ESTAB      0      0      192.168.10.82:49100              192.168.10.81:49161               users:(("glusterfs",pid=115740,fd=13))
ESTAB      0      0      192.168.10.82:49087              192.168.10.81:24007               users:(("glusterfs",pid=114131,fd=11))
ESTAB      0      0      192.168.10.82:49047              192.168.10.81:49162               users:(("glusterfs",pid=7460,fd=14))
TIME-WAIT  0      0      192.168.10.82:47558              192.168.10.81:2379               
ESTAB      0      0      192.168.10.82:44456              192.168.10.81:10250               users:(("kube-apiserver",pid=76134,fd=93))
ESTAB      0      0      192.168.10.82:57792              192.168.10.81:2379                users:(("kube-apiserver",pid=76134,fd=27))
ESTAB      0      0      192.168.10.82:57702              192.168.10.81:2379                users:(("kube-apiserver",pid=76134,fd=5))
TIME-WAIT  0      0      192.168.10.82:49284              192.168.10.81:2379               
ESTAB      0      0      192.168.10.82:49066              192.168.10.81:24007               users:(("glusterfs",pid=123708,fd=11))
ESTAB      0      0      192.168.10.82:49120              192.168.10.81:49157               users:(("glusterfs",pid=114509,fd=14))
ESTAB      0      0      192.168.10.82:49111              192.168.10.81:24007               users:(("glusterfs",pid=55598,fd=11))
ESTAB      0      0      192.168.10.82:57676              192.168.10.81:2379                users:(("kube-apiserver",pid=76134,fd=8))
ESTAB      0      0      192.168.10.82:49059              192.168.10.81:24007               users:(("glusterfs",pid=7469,fd=11))
ESTAB      0      0      192.168.10.82:49110              192.168.10.81:49163               users:(("glusterfs",pid=114643,fd=13))
ESTAB      0      0      192.168.10.82:49121              192.168.10.81:24007               users:(("glusterfs",pid=55040,fd=11))
ESTAB      0      0      192.168.10.82:57856              192.168.10.81:2379                users:(("kube-apiserver",pid=76134,fd=38))
ESTAB      0      0      192.168.10.82:49126              192.168.10.81:24007               users:(("glusterfs",pid=54888,fd=11))
ESTAB      0      0      192.168.10.82:49036              192.168.10.81:49153               users:(("glusterfs",pid=91593,fd=14))
TIME-WAIT  0      0      192.168.10.82:48796              192.168.10.81:2379               
TIME-WAIT  0      0      192.168.10.82:47586              192.168.10.81:2379               
ESTAB      0      0         ::ffff:192.168.10.80:6443                  ::ffff:192.168.10.81:49952               users:(("kube-apiserver",pid=76134,fd=83))
ESTAB      0      0         ::ffff:192.168.10.80:6443                  ::ffff:192.168.10.81:43144               users:(("kube-apiserver",pid=76134,fd=103))
ESTAB      0      0         ::ffff:192.168.10.80:6443                  ::ffff:192.168.10.81:48902               users:(("kube-apiserver",pid=76134,fd=80))

三、docker 堆栈打印。 yum -y install   socat

 

socat -d -d TCP-LISTEN:18080,fork,bind=172.30.3.102 UNIX:/var/run/docker.sock

四、kubelet cri 学习,https://zhuanlan.zhihu.com/p/87602649

kublet-dockercleint(dockermanager) -----dockerd -containerd-runc 

五、namespace.,https://blog.51cto.com/speakingbaicai/1359825?from=groupmessage

六、busybox无法拉取

docker import 导入

 

posted @ 2022-11-15 09:35  马里亚纳仰望星空  Views(306)  Comments(0Edit  收藏  举报