kube-scheduler 调度源码分析

调度器核心的数据结构是 Scheduler,Scheduler对象初始化完成后就开始执行调度,Scheduler 对象的大概结构如下

    +------------------+                                         +------------------+                            implement        +------------------+
    |    scheduler     |                                +------->|  schedulerCache  |----------------------------interface------->|      Cache       |
    +------------------+-----------------------+        |        +------------------+-----------------------+                     +------------------+-----------------------+
    |    SchedulerCache internalcache.Cache    |--------+        |       assumedPods map[string]bool        |                     |              AssumePod(...)              |
    +------------------------------------------+                 +------------------------------------------+                     +------------------------------------------+
    |    Algorithm  core.ScheduleAlgorithm     |--------+        |      podStates map[string]*podState      |                     |            FinishBinding(...)            |
    +------------------------------------------+        |        +------------------------------------------+                     |                                          |
    |             SchedulingQueue              |        |        |    nodes map[string]*nodeInfoListItem    |                     +------------------------------------------+
    |      internalqueue.SchedulingQueue       |-----+  |        |                                          |                     |              ForgetPod(...)              |
    +------------------------------------------+     |  |        +------------------------------------------+                     +------------------------------------------+
    |           Profiles profile.Map           |--+  |  |        |        headNode *nodeInfoListItem        |                     |               AddPod(...)                |
    +------------------------------------------+  |  |  |        +------------------------------------------+                     +------------------------------------------+
    |    NextPod func() *framework.PodInfo     |  |  |  |        |            nodeTree *nodeTree            |                     |               AddNode(...)               |
    +------------------------------------------+  |  |  |        +------------------------------------------+                     +------------------------------------------+
    |                  ......                  |  |  |  |        |                  ......                  |                     |                  ......                  |
    +------------------------------------------+  |  |  |        +------------------------------------------+                     +------------------------------------------+
               +----------------------------------+  |  |
               |                                     |  |
               |                                     |  |
               v                                     |  |         +------------------+                          implement         +------------------+
     +------------------+                            |  +-------->| genericScheduler |--------------------------interface-------->|ScheduleAlgorithm |
     |     Profile      |                            |            +------------------+-----------------------+                    +------------------+-----------------------+
     +------------------+-----------+                |            |        cache internalcache.Cache         |                    |              Schedule(...)               |
     |     framework.Framework      |--+             |            +------------------------------------------+                    +------------------------------------------+
     +------------------------------+  |             |            |             schedulingQueue              |                    |               Preempt(...)               |
     |            ......            |  |             |            |      internalqueue.SchedulingQueue       |                    +------------------------------------------+
     +------------------------------+  |             |            +------------------------------------------+                    |              Extenders(...)              |
                                       |             |            |      extenders []SchedulerExtender       |                    +------------------------------------------+
              +------------------------+             |            +------------------------------------------+
              |                                      |            | nodeInfoSnapshot *internalcache.Snapshot |
              v                                      |            +------------------------------------------+
     +----------------------------+                  |            |      percentageOfNodesToScore int32      |
     |         framework          |---------------+  |            +------------------------------------------+
     +----------------------------+------+        |  |
     |         registry Registry         |        |  |
     +-----------------------------------+        |  |            +------------------+                          implement         +------------------+
     |queueSortPlugins []QueueSortPlugin |----+   |  +----------->|  PriorityQueue   |--------------------------interface-------->| SchedulingQueue  |
     +-----------------------------------+    |   |               +------------------+-----------------------+                    +------------------+-----------------------+
     | preFilterPlugin []PreFilterPlugin |    |   |               |            activeQ *heap.Heap            |                    |          Add(pod *v1.Pod) error          |
     +-----------------------------------+    |   |               +------------------------------------------+                    +------------------------------------------+
     |   filterPlugins []FilterPlugin    |    |   |               |          podBackoffQ *heap.Heap          |                    |    Pop() (*framework.PodInfo, error)     |
     +-----------------------------------+    |   |               |                                          |                    +------------------------------------------+
     | preScorePlugins []PreScorePlugin  |    |   |               +------------------------------------------+                    |   MoveAllToActiveOrBackoffQueue(event    |
     +-----------------------------------+    |   |               |   unschedulableQ *UnschedulablePodsMap   |                    +------------------------------------------+
     |    scorePlugins []ScorePlugin     |    |   |               +------------------------------------------+                    |                  Run()                   |
     +-----------------------------------+    |   |               |      nominatedPods *nominatedPodMap      |                    +------------------------------------------+
     |  reservePlugins []ReservePlugin   |    |   |               +------------------------------------------+                    |                  ......                  |
     +-----------------------------------+    |   |               |          schedulingCycle int64           |                    +------------------------------------------+
     |  preBindPlugins []PreBindPlugin   |    |   |               +------------------------------------------+
     +-----------------------------------+    |   |               |          moveRequestCycle int64          |
     |     bindPlugins []BindPlugin      |    |   |               +------------------------------------------+
     +-----------------------------------+    |   |
     | postBindPlugins []PostBindPlugin  |    |   |
     +-----------------------------------+    |   |                                                           implement
     |unreservePlugins []UnreservePlugin |    |   |                                                           interface            +------------------+
     +-----------------------------------+    |   +------------------------------------------------------------------------------->|    Framework     |
     |   permitPlugins []PermitPlugin    |--->|                                                                                    +------------------+----------------+
     +-----------------------------------+    |                                                                                    |     QueueSortFunc() LessFunc      |
     |              ......               |    |                                                                                    +-----------------------------------+
     +-----------------------------------+    |                                                                                    |     RunPreFilterPlugins(...)      |
                                              +-------------+                                                                      +-----------------------------------+
                                                            |                                                                      |       RunFilterPlugins(...)       |
                                                            |                                                                      +-----------------------------------+
                                                            v                                                                      | RunPreFilterExtensionAddPod(...)  |
     +-----------interface----------------------------------------------------------------------------------------+                +-----------------------------------+
     |                                                     +------------------+                                   |                |RunPreFilterExtensionRemovePod(...)|
     | +------------------+                                |  PreBindPlugin   |                                   |                +-----------------------------------+
     | | QueueSortPlugin  |                                +------------------+----------------+                  |                |      RunPreScorePlugins(...)      |
     | +------------------+----------------+               |           PreBind(...)            |                  |                +-----------------------------------+
     | |   Less(*PodInfo, *PodInfo) bool   |               +-----------------------------------+                  |                |       RunScorePlugins(...)        |
     | +-----------------------------------+                                                                      |                +-----------------------------------+
     | +------------------+                                                                                       |                |      RunPreBindPlugins(...)       |
     | | PreFilterPlugin  |                                +------------------+                                   |                +-----------------------------------+
     | +------------------+----------------+               |    BindPlugin    |                                   |                |      RunPostBindPlugins(...)      |
     | |          PreFilter(...)           |               +------------------+----------------+                  |                +-----------------------------------+
     | +-----------------------------------+               |             Bind(...)             |                  |                |      RunReservePlugins(...)       |
     | |       PreFilterExtensions()       |               +-----------------------------------+                  |                +-----------------------------------+
     | +-----------------------------------+                                                                      |                |     RunUnreservePlugins(...)      |
     |                                                                                                            |                +-----------------------------------+
     | +------------------+                                +------------------+                                   |                |       RunPermitPlugins(...)       |
     | |  PreScorePlugin  |                                |  PostBindPlugin  |                                   |                +-----------------------------------+
     | +------------------+----------------+               +------------------+----------------+                  |                |         WaitOnPermit(...)         |
     | |           PreScore(...)           |               |           PostBind(...)           |                  |                +-----------------------------------+
     | +-----------------------------------+               +-----------------------------------+                  |                |        RunBindPlugins(...)        |
     |                                                                                                            |                +-----------------------------------+
     | +------------------+                                +------------------+                                   |                |              ......               |
     | |   ScorePlugin    |                                | UnreservePlugin  |                                   |                +-----------------------------------+
     | +------------------+----------------+               +------------------+----------------+                  |
     | |            Score(...)             |               |          Unreserve(...)           |                  |
     | +-----------------------------------+               +-----------------------------------+                  |
     | |         ScoreExtensions()         |                                                                      |
     | +-----------------------------------+               +------------------+                                   |
     | +------------------+                                |   PermitPlugin   |                                   |
     | |  ReservePlugin   |                                +------------------+----------------+                  |
     | +------------------+----------------+               |            Permit(...)            |                  |
     | |           Reserve(...)            |               +-----------------------------------+                  |
     | +-----------------------------------+                                                                      |
     |                                                                                                            |
     +------------------------------------------------------------------------------------------------------------+

scheduler 对象中,主要关注4个核心的数据成员,分别是

  • SchedulerCache: 调度缓存
  • Algorithm:调度算法
  • SchedulingQueue: 优先级队列
  • Profiles:插件集

scheduler 创建完成后,开始调度,代码如下

// Run executes the scheduler based on the given configuration. It only returns on error or when context is done.
func Run(ctx context.Context, cc schedulerserverconfig.CompletedConfig, outOfTreeRegistryOptions ...Option) error {
        ......
        // Create the scheduler.
	sched, err := scheduler.New(cc.Client,
		cc.InformerFactory,
		cc.PodInformer,
		recorderFactory,
		ctx.Done(),
		scheduler.WithProfiles(cc.ComponentConfig.Profiles...),
		scheduler.WithAlgorithmSource(cc.ComponentConfig.AlgorithmSource),
		scheduler.WithPreemptionDisabled(cc.ComponentConfig.DisablePreemption),
		scheduler.WithPercentageOfNodesToScore(cc.ComponentConfig.PercentageOfNodesToScore),
		scheduler.WithBindTimeoutSeconds(cc.ComponentConfig.BindTimeoutSeconds),
		scheduler.WithFrameworkOutOfTreeRegistry(outOfTreeRegistry),
		scheduler.WithPodMaxBackoffSeconds(cc.ComponentConfig.PodMaxBackoffSeconds),
		scheduler.WithPodInitialBackoffSeconds(cc.ComponentConfig.PodInitialBackoffSeconds),
		scheduler.WithExtenders(cc.ComponentConfig.Extenders...),
	)
        ......

       sched.Run(ctx)
}

继续进入 sched.Run(ctx) 方法

// Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling and blocked until the context is done.
func (sched *Scheduler) Run(ctx context.Context) {
	if !cache.WaitForCacheSync(ctx.Done(), sched.scheduledPodsHasSynced) {
		return
	}
	sched.SchedulingQueue.Run()
	wait.UntilWithContext(ctx, sched.scheduleOne, 0)
	sched.SchedulingQueue.Close()
}

在 Run 方法内,开始同步 watch 缓存,接着开启 调度队列 监听,带调度的pod都是通过调度队列获取。最后调用 sched.scheduleOne,开始调度流程

// scheduleOne does the entire scheduling workflow for a single pod.  It is serialized on the scheduling algorithm's host fitting.
func (sched *Scheduler) scheduleOne(ctx context.Context) {
      // 通过 NextPod 从调度队列中获取一个带调度pod
      podInfo := sched.NextPod()
      ......
      // 通过 pod.Spec.SchedulerName,在 Profiles 字典中获取注册的插件集
      prof, err := sched.profileForPod(pod)
      ......
     // 这个最核心的,调用 Algorithm 成员的 Schedule 方法,并传入 prof和pod 参数,这个方法完成后,会返回调度的结果。
     scheduleResult, err := sched.Algorithm.Schedule(schedulingCycleCtx, prof, state, pod)

     ......
     // 执行 Reserve 插件
     prof.RunReservePlugins(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
      ......
     // 将 pod.NodeName设置为调度算法选择的最佳的node。并将pod信息和node信息更新到  SchedulerCache 缓存。
     sched.assume(assumedPod, scheduleResult.SuggestedHost)

     ......
    // 执行 Permit 插件,该插件在真正的绑定前,会做一个检查,返回失败,则调度失败,则会将pod从 SchedulerCache 中删除
    prof.RunPermitPlugins(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
    ......
    //  开启协程,并发的执行pod绑定
    go func() {
         ......
         // 执行PreBind插件,成功则继续执行
         preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
         
         ......
         // 执行bind 方法开始 绑定 pod 和 node,绑定成功后,pod调度成功
         err := sched.bind(bindingCycleCtx, prof, assumedPod, scheduleResult.SuggestedHost, state)
            
        ......
        // 绑定完成后,执行 PostBind 插件
	prof.RunPostBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
    }
}
posted @ 2020-07-26 21:57  seamounts  阅读(271)  评论(0编辑  收藏  举报