kubernetes源码阅读笔记——Kubelet(之二)

这一篇文章我们先从NewMainKubelet开始。

一、NewMainKubelet

cmd/kubelet/kubelet.go

// NewMainKubelet instantiates a new Kubelet object along with all the required internal modules.
// No initialization of Kubelet and its modules should happen here.
func NewMainKubelet(......) (*Kubelet, error) {
   ...

   if kubeDeps.PodConfig == nil {
      var err error
      kubeDeps.PodConfig, err = makePodSourceConfig(kubeCfg, kubeDeps, nodeName, bootstrapCheckpointPath)
      if err != nil {
         return nil, err
      }
   }

   containerGCPolicy := kubecontainer.ContainerGCPolicy{
      MinAge:             minimumGCAge.Duration,
      MaxPerPodContainer: int(maxPerPodContainerCount),
      MaxContainers:      int(maxContainerCount),
   }

   daemonEndpoints := &v1.NodeDaemonEndpoints{
      KubeletEndpoint: v1.DaemonEndpoint{Port: kubeCfg.Port},
   }

   imageGCPolicy := images.ImageGCPolicy{
      MinAge:               kubeCfg.ImageMinimumGCAge.Duration,
      HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
      LowThresholdPercent:  int(kubeCfg.ImageGCLowThresholdPercent),
   }

   ...

   serviceIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc})
   if kubeDeps.KubeClient != nil {
      serviceLW := cache.NewListWatchFromClient(kubeDeps.KubeClient.CoreV1().RESTClient(), "services", metav1.NamespaceAll, fields.Everything())
      r := cache.NewReflector(serviceLW, &v1.Service{}, serviceIndexer, 0)
      go r.Run(wait.NeverStop)
   }
   serviceLister := corelisters.NewServiceLister(serviceIndexer)

   nodeIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{})
   if kubeDeps.KubeClient != nil {
      fieldSelector := fields.Set{api.ObjectNameField: string(nodeName)}.AsSelector()
      nodeLW := cache.NewListWatchFromClient(kubeDeps.KubeClient.CoreV1().RESTClient(), "nodes", metav1.NamespaceAll, fieldSelector)
      r := cache.NewReflector(nodeLW, &v1.Node{}, nodeIndexer, 0)
      go r.Run(wait.NeverStop)
   }
   nodeInfo := &predicates.CachedNodeInfo{NodeLister: corelisters.NewNodeLister(nodeIndexer)}

   // TODO: get the real node object of ourself,
   // and use the real node name and UID.
   // TODO: what is namespace for node?
   nodeRef := &v1.ObjectReference{
      Kind:      "Node",
      Name:      string(nodeName),
      UID:       types.UID(nodeName),
      Namespace: "",
   }

   containerRefManager := kubecontainer.NewRefManager()

   oomWatcher := NewOOMWatcher(kubeDeps.CAdvisorInterface, kubeDeps.Recorder)

   ...

   klet := &Kubelet{
      hostname:                                hostname,
      hostnameOverridden:                      len(hostnameOverride) > 0,
      nodeName:                                nodeName,
      kubeClient:                              kubeDeps.KubeClient,
      csiClient:                               kubeDeps.CSIClient,
      heartbeatClient:                         kubeDeps.HeartbeatClient,
      onRepeatedHeartbeatFailure:              kubeDeps.OnHeartbeatFailure,
      rootDirectory:                           rootDirectory,
      resyncInterval:                          kubeCfg.SyncFrequency.Duration,
      sourcesReady:                            config.NewSourcesReady(kubeDeps.PodConfig.SeenAllSources),
      registerNode:                            registerNode,
      registerWithTaints:                      registerWithTaints,
      registerSchedulable:                     registerSchedulable,
      dnsConfigurer:                           dns.NewConfigurer(kubeDeps.Recorder, nodeRef, parsedNodeIP, clusterDNS, kubeCfg.ClusterDomain, kubeCfg.ResolverConfig),
      serviceLister:                           serviceLister,
      nodeInfo:                                nodeInfo,
      masterServiceNamespace:                  masterServiceNamespace,
      streamingConnectionIdleTimeout:          kubeCfg.StreamingConnectionIdleTimeout.Duration,
      recorder:                                kubeDeps.Recorder,
      cadvisor:                                kubeDeps.CAdvisorInterface,
      cloud:                                   kubeDeps.Cloud,
      externalCloudProvider:                   cloudprovider.IsExternal(cloudProvider),
      providerID:                              providerID,
      nodeRef:                                 nodeRef,
      nodeLabels:                              nodeLabels,
      nodeStatusUpdateFrequency:               kubeCfg.NodeStatusUpdateFrequency.Duration,
      nodeStatusReportFrequency:               kubeCfg.NodeStatusReportFrequency.Duration,
      os:                                      kubeDeps.OSInterface,
      oomWatcher:                              oomWatcher,
      cgroupsPerQOS:                           kubeCfg.CgroupsPerQOS,
      cgroupRoot:                              kubeCfg.CgroupRoot,
      mounter:                                 kubeDeps.Mounter,
      maxPods:                                 int(kubeCfg.MaxPods),
      podsPerCore:                             int(kubeCfg.PodsPerCore),
      syncLoopMonitor:                         atomic.Value{},
      daemonEndpoints:                         daemonEndpoints,
      containerManager:                        kubeDeps.ContainerManager,
      containerRuntimeName:                    containerRuntime,
      redirectContainerStreaming:              crOptions.RedirectContainerStreaming,
      nodeIP:                                  parsedNodeIP,
      nodeIPValidator:                         validateNodeIP,
      clock:                                   clock.RealClock{},
      enableControllerAttachDetach:            kubeCfg.EnableControllerAttachDetach,
      iptClient:                               utilipt.New(utilexec.New(), utildbus.New(), protocol),
      makeIPTablesUtilChains:                  kubeCfg.MakeIPTablesUtilChains,
      iptablesMasqueradeBit:                   int(kubeCfg.IPTablesMasqueradeBit),
      iptablesDropBit:                         int(kubeCfg.IPTablesDropBit),
      experimentalHostUserNamespaceDefaulting: utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalHostUserNamespaceDefaultingGate),
      keepTerminatedPodVolumes:                keepTerminatedPodVolumes,
      nodeStatusMaxImages:                     nodeStatusMaxImages,
      enablePluginsWatcher:                    utilfeature.DefaultFeatureGate.Enabled(features.KubeletPluginsWatcher),
   }

   if klet.cloud != nil {
      klet.cloudResourceSyncManager = cloudresource.NewSyncManager(klet.cloud, nodeName, klet.nodeStatusUpdateFrequency)
   }

   var secretManager secret.Manager
   var configMapManager configmap.Manager
   switch kubeCfg.ConfigMapAndSecretChangeDetectionStrategy {
   case kubeletconfiginternal.WatchChangeDetectionStrategy:
      secretManager = secret.NewWatchingSecretManager(kubeDeps.KubeClient)
      configMapManager = configmap.NewWatchingConfigMapManager(kubeDeps.KubeClient)
   case kubeletconfiginternal.TTLCacheChangeDetectionStrategy:
      secretManager = secret.NewCachingSecretManager(
         kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
      configMapManager = configmap.NewCachingConfigMapManager(
         kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
   case kubeletconfiginternal.GetChangeDetectionStrategy:
      secretManager = secret.NewSimpleSecretManager(kubeDeps.KubeClient)
      configMapManager = configmap.NewSimpleConfigMapManager(kubeDeps.KubeClient)
   default:
      return nil, fmt.Errorf("unknown configmap and secret manager mode: %v", kubeCfg.ConfigMapAndSecretChangeDetectionStrategy)
   }

   klet.secretManager = secretManager
   klet.configMapManager = configMapManager

   if klet.experimentalHostUserNamespaceDefaulting {
      klog.Infof("Experimental host user namespace defaulting is enabled.")
   }

   machineInfo, err := klet.cadvisor.MachineInfo()
   if err != nil {
      return nil, err
   }
   klet.machineInfo = machineInfo

   imageBackOff := flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff)

   klet.livenessManager = proberesults.NewManager()

   ...

   // podManager is also responsible for keeping secretManager and configMapManager contents up-to-date.
   klet.podManager = kubepod.NewBasicPodManager(kubepod.NewBasicMirrorClient(klet.kubeClient), secretManager, configMapManager, checkpointManager)

   klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet)

   if remoteRuntimeEndpoint != "" {
      // remoteImageEndpoint is same as remoteRuntimeEndpoint if not explicitly specified
      if remoteImageEndpoint == "" {
         remoteImageEndpoint = remoteRuntimeEndpoint
      }
   }

   ...

   switch containerRuntime {
   case kubetypes.DockerContainerRuntime:
      // Create and start the CRI shim running as a grpc server.
      streamingConfig := getStreamingConfig(kubeCfg, kubeDeps, crOptions)
      ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
         &pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory, !crOptions.RedirectContainerStreaming)
      if err != nil {
         return nil, err
      }
      if crOptions.RedirectContainerStreaming {
         klet.criHandler = ds
      }

      // The unix socket for kubelet <-> dockershim communication.
      klog.V(5).Infof("RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q",
         remoteRuntimeEndpoint,
         remoteImageEndpoint)
      klog.V(2).Infof("Starting the GRPC server for the docker CRI shim.")
      server := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
      if err := server.Start(); err != nil {
         return nil, err
      }

      // Create dockerLegacyService when the logging driver is not supported.
      supported, err := ds.IsCRISupportedLogDriver()
      if err != nil {
         return nil, err
      }
      if !supported {
         klet.dockerLegacyService = ds
         legacyLogProvider = ds
      }
   case kubetypes.RemoteContainerRuntime:
      // No-op.
      break
   default:
      return nil, fmt.Errorf("unsupported CRI runtime: %q", containerRuntime)
   }
   runtimeService, imageService, err := getRuntimeAndImageServices(remoteRuntimeEndpoint, remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout)
   if err != nil {
      return nil, err
   }
   klet.runtimeService = runtimeService

   if utilfeature.DefaultFeatureGate.Enabled(features.RuntimeClass) && kubeDeps.DynamicKubeClient != nil {
      klet.runtimeClassManager = runtimeclass.NewManager(kubeDeps.DynamicKubeClient)
   }

   runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
      kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
      klet.livenessManager,
      seccompProfileRoot,
      containerRefManager,
      machineInfo,
      klet,
      kubeDeps.OSInterface,
      klet,
      httpClient,
      imageBackOff,
      kubeCfg.SerializeImagePulls,
      float32(kubeCfg.RegistryPullQPS),
      int(kubeCfg.RegistryBurst),
      kubeCfg.CPUCFSQuota,
      kubeCfg.CPUCFSQuotaPeriod,
      runtimeService,
      imageService,
      kubeDeps.ContainerManager.InternalContainerLifecycle(),
      legacyLogProvider,
      klet.runtimeClassManager,
   )
   if err != nil {
      return nil, err
   }
   klet.containerRuntime = runtime
   klet.streamingRuntime = runtime
   klet.runner = runtime

   runtimeCache, err := kubecontainer.NewRuntimeCache(klet.containerRuntime)
   if err != nil {
      return nil, err
   }
   klet.runtimeCache = runtimeCache

   if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
      klet.StatsProvider = stats.NewCadvisorStatsProvider(
         klet.cadvisor,
         klet.resourceAnalyzer,
         klet.podManager,
         klet.runtimeCache,
         klet.containerRuntime,
         klet.statusManager)
   } else {
      klet.StatsProvider = stats.NewCRIStatsProvider(
         klet.cadvisor,
         klet.resourceAnalyzer,
         klet.podManager,
         klet.runtimeCache,
         runtimeService,
         imageService,
         stats.NewLogMetricsService())
   }

   klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})
   klet.runtimeState = newRuntimeState(maxWaitForContainerRuntime)
   klet.runtimeState.addHealthCheck("PLEG", klet.pleg.Healthy)
   if _, err := klet.updatePodCIDR(kubeCfg.PodCIDR); err != nil {
      klog.Errorf("Pod CIDR update failed %v", err)
   }

   // setup containerGC
   containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
   if err != nil {
      return nil, err
   }
   klet.containerGC = containerGC
   klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, integer.IntMax(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))

   // setup imageManager
   imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
   if err != nil {
      return nil, fmt.Errorf("failed to initialize image manager: %v", err)
   }
   klet.imageManager = imageManager

   ...

   klet.probeManager = prober.NewManager(
      klet.statusManager,
      klet.livenessManager,
      klet.runner,
      containerRefManager,
      kubeDeps.Recorder)

   tokenManager := token.NewManager(kubeDeps.KubeClient)

   if !utilfeature.DefaultFeatureGate.Enabled(features.MountPropagation) {
      return nil, fmt.Errorf("mount propagation feature gate has been deprecated and will be removed in 1.14")
   }

   klet.volumePluginMgr, err =
      NewInitializedVolumePluginMgr(klet, secretManager, configMapManager, tokenManager, kubeDeps.VolumePlugins, kubeDeps.DynamicPluginProber)
   if err != nil {
      return nil, err
   }
   if klet.enablePluginsWatcher {
      klet.pluginWatcher = pluginwatcher.NewWatcher(
         klet.getPluginsRegistrationDir(), /* sockDir */
         klet.getPluginsDir(),             /* deprecatedSockDir */
      )
   }

   ...

   // setup volumeManager
   klet.volumeManager = volumemanager.NewVolumeManager(
      kubeCfg.EnableControllerAttachDetach,
      nodeName,
      klet.podManager,
      klet.statusManager,
      klet.kubeClient,
      klet.volumePluginMgr,
      klet.containerRuntime,
      kubeDeps.Mounter,
      klet.getPodsDir(),
      kubeDeps.Recorder,
      experimentalCheckNodeCapabilitiesBeforeMount,
      keepTerminatedPodVolumes)

   ...

   // Generating the status funcs should be the last thing we do,
   // since this relies on the rest of the Kubelet having been constructed.
   klet.setNodeStatusFuncs = klet.defaultNodeStatusFuncs()

   return klet, nil
}

方法非常长,只贴出一部分,但是很重要。主要做了以下几件事:

(1)为kubelet加载各种配置,比如pod信息源、垃圾回收相关配置、监听的端口等。其中的podConfig引申一下,它是pod的三种信息来源的聚合(文件、URL和API Server)。进入makePodSourceConfig方法即可发现这一点。kubelet可以通过这三条途径获取pod的信息。

(2)根据前面的一系列配置,创建一个kubelet实例。可以看到kubelet实例的元素有数十个之多;

(3)创建manager等配置项,完善kubelet实例。manager用于管理pod运行时所需要加载的资源。如servicemanager和configmapmanager就是管理Pod运行时所需的secret和configmap的。注意这里有对于secret和configmap的不同的更新策略的选择;

(4)判断容器运行时。可见,rkt已经废弃,可用的就是docker和其他。如果是docker,则创建一个docker shim server并运行。这一块很重要,后面详细分析;

(5)继续包装这个kubelet实例,为实例添加runtimemanager、垃圾回收、imagemanager、volumemanager等等各种组件。其中有的组件值得仔细研究,后面会涉及到;

(6)将NodeStatusFunc加入kubelet实例。这一步通过调用defaultNodeStatusFuncs方法实现。

进入这一方法:

pkg/kubelet/kubelet_node_status.go

// defaultNodeStatusFuncs is a factory that generates the default set of
// setNodeStatus funcs
func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error {
    // if cloud is not nil, we expect the cloud resource sync manager to exist
    var nodeAddressesFunc func() ([]v1.NodeAddress, error)
    if kl.cloud != nil {
        nodeAddressesFunc = kl.cloudResourceSyncManager.NodeAddresses
    }
    var validateHostFunc func() error
    if kl.appArmorValidator != nil {
        validateHostFunc = kl.appArmorValidator.ValidateHost
    }
    var setters []func(n *v1.Node) error
    setters = append(setters,
        nodestatus.NodeAddress(...),
        nodestatus.MachineInfo(...),
        nodestatus.VersionInfo(...),
        nodestatus.DaemonEndpoints(kl.daemonEndpoints),
        nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList),
        nodestatus.GoRuntime(),
    )
    if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
        setters = append(setters, nodestatus.VolumeLimits(kl.volumePluginMgr.ListVolumePluginWithLimits))
    }
    setters = append(setters,
        nodestatus.MemoryPressureCondition(...),
        nodestatus.DiskPressureCondition(...),
        nodestatus.PIDPressureCondition(...),
        nodestatus.ReadyCondition(...),
        nodestatus.VolumesInUse(...),
        kl.recordNodeSchedulableEvent,
    )
    return setters
}

可以看到方法将维护宿主节点运行状态相关的函数都加入进一个setters数组,并返回。这些函数,包括地址、内存、磁盘、存储卷等,维护了宿主机的信息和状态。

下面我们首先看一下docker shim server相关的内容。

二、DockerServer

前面提到,在NewMainKubelet方法中会判断容器运行时,对于docker的容器运行时进行一系列操作。

具体说来,先创建一个DockerService对象(通过NewDockerService方法)。这个对象中包含了一个CRI shim运行时需要包含的方法集合。根据这个对象创建一个DockerServer(通过NewDockerServer方法)结构体实例,然后执行server.Start方法运行这个实例。

看一下DockerServer结构体:

pkg/kubelet/dockershim/remote/docker_server.go

// DockerServer is the grpc server of dockershim.
type DockerServer struct {
    // endpoint is the endpoint to serve on.
    endpoint string
    // service is the docker service which implements runtime and image services.
    service dockershim.CRIService
    // server is the grpc server.
    server *grpc.Server
}

可以看出,结构体包含了这个grpc服务运行的端点、包含的CRIService、以及服务自身。

CRIService是一个接口,进入接口可以看到:

pkg/kubelet/dockershim/docker_service.go

// CRIService includes all methods necessary for a CRI server.
type CRIService interface {
   runtimeapi.RuntimeServiceServer
   runtimeapi.ImageServiceServer
   Start() error
}

对于一个CRI,需要包含RuntimeServiceServer和ImageServiceServer两大类方法,分别用于操作容器和镜像。

进入RuntimeServiceServer接口,可以看到接口定义了容器的启动、停止、删除、状态、进入等一系列容器级别的操作,以及对pod sandbox的pod级别的操作。

而进入ImageServiceServer接口,则可以看到镜像的列举、状态、拉取、删除、信息五条操作。

也就是说,只要我们能够将这两个接口中定义的全部方法都实现,我们也可以定义自己的CRI。这就是kubelet与CRI的解耦。用图片来表示,就是这样:

kubelet通过grpc协议调用grpc server中定义的一系列方法,实现与CRI的交互,对容器和镜像进行具体操作。

事实上,由于docker是k8s默认的CRI,如上所说,启动docker shim的过程都写在了k8s的源码中了。同样的,docker CRI的RuntimeServiceServer和ImageServiceServer两大类方法也都在k8s源码中定义好了,具体位置在pkg/kubelet/dockershim包中,这里就不一一列举了。剩余部分在下一篇继续讲述。https://www.cnblogs.com/00986014w/p/10907712.html

posted @ 2019-05-21 14:33  右威卫大将军  阅读(1050)  评论(0编辑  收藏  举报