海豚调度Dolphinscheduler源码分析(四)MasterServer的启动

//Curator是zk的一个客户端框架,其中分装了分布式公平可重入互斥锁,最为常见是InterProcessMutex

先简单总结下MasterServier服务的启动流程:

  1. 初始化netty服务器,并启动
  2. 通过zookeeper客户端Curator创建一个znode临时节点 /dolphinscheduler/nodes/master/<ip>:<port>,如果主机因为宕机,网络等问题,临时节点会消失。
  3. 通过zookeeper客户端Curator对上面的znode注册监听器 (监听断开连接,重新连接,中止事件)因为是临时节点所以重新连接需要重新创建节点
  4. 尝试获取 znode节点 /dolphinscheduler/lock/failover/startup-masters 的分布式锁 调用了mutex.acquire();获取锁,只有一个线程可以在同一时获取到锁,然后成为Master(active),没有获取到锁的,需要在上面注册watcher。
  5. 启动一个Master的zk客户端
  6. 启动master scheduler 服务
  7. 启动quartz 定时任务服务
  8. 添加一个jvm的钩子 当jvm关闭时,可以优雅的停止掉服务

分布式锁可以保证同一时间只有一个线程可以获取到锁,

 

 

今天来分析server模块的master,MasterServer类

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.dolphinscheduler.server.master;

import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.remote.NettyRemotingServer;
import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.config.NettyServerConfig;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.processor.TaskAckProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskKillResponseProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskResponseProcessor;
import org.apache.dolphinscheduler.server.master.registry.MasterRegistry;
import org.apache.dolphinscheduler.server.master.runner.MasterSchedulerService;
import org.apache.dolphinscheduler.server.worker.WorkerServer;
import org.apache.dolphinscheduler.server.zk.ZKMasterClient;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.apache.dolphinscheduler.service.quartz.QuartzExecutors;
import org.quartz.SchedulerException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.WebApplicationType;
import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.FilterType;

import javax.annotation.PostConstruct;




@ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = {
        @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class})
})
public class MasterServer {

    /**
     * logger of MasterServer
     */
    private static final Logger logger = LoggerFactory.getLogger(MasterServer.class);

    /**
     * master config
     * master 配置类注入
     */
    @Autowired
    private MasterConfig masterConfig;

    /**
     *  spring application context
     *  only use it for initialization
     *  仅用于初始化
     */
    @Autowired
    private SpringApplicationContext springApplicationContext;

    /**
     * 网络远程服务器
     * netty remote server
     */
    private NettyRemotingServer nettyRemotingServer;

    /**
     * master registry
     * master服务监听
     */
    @Autowired
    private MasterRegistry masterRegistry;

    /**
     * zk master client
     * zk 客户端curator
     */
    @Autowired
    private ZKMasterClient zkMasterClient;

    /**
     * scheduler service
     */
    @Autowired
    private MasterSchedulerService masterSchedulerService;

    /**
     * master server startup
     *
     * master server not use web service
     * @param args arguments
     */
    public static void main(String[] args) {
        Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER);
        new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args);
    }

    /**
     * run master server
     * @PostConstruct 会在该bean依赖注入完成后,执行该方法
     */
    @PostConstruct
    public void run(){

        //init remoting server
        //初始化 netty 服务器
        NettyServerConfig serverConfig = new NettyServerConfig();
        serverConfig.setListenPort(masterConfig.getListenPort());
        this.nettyRemotingServer = new NettyRemotingServer(serverConfig);
        this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RESPONSE, new TaskResponseProcessor());
        this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_ACK, new TaskAckProcessor());
        this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_RESPONSE, new TaskKillResponseProcessor());
        this.nettyRemotingServer.start();

        // register
        // 添加node/master/XXX:5678节点,并监听事件
        this.masterRegistry.registry();

        // self tolerant
        // 这个地方有个分布式锁,保持/dolphinscheduler/lock/failover/startup-masters
        // 锁,然后创建znode节点,
        // 并查询master任务和worker任务是否需要容错
        this.zkMasterClient.start();

        // scheduler start
        // 启动master scheduler
        // MasterSchedulerService,继承了Thread类
        this.masterSchedulerService.start();

        // start QuartzExecutors
        // what system should do if exception
        try {
            logger.info("start Quartz server...");
            // 开启quartzExecutor 服务
            QuartzExecutors.getInstance().start();
        } catch (Exception e) {
            try {
                QuartzExecutors.getInstance().shutdown();
            } catch (SchedulerException e1) {
                logger.error("QuartzExecutors shutdown failed : " + e1.getMessage(), e1);
            }
            logger.error("start Quartz failed", e);
        }

        /**
         *  register hooks, which are called before the process exits,在关闭程序时,jvm会先执行close方法
         */
        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
            @Override
            public void run() {
                close("shutdownHook");
            }
        }));

    }

    /**
     * gracefully close
     * @param cause close cause
     */
    public void close(String cause) {

        try {
            //execute only once
            if(Stopper.isStopped()){
                return;
            }

            logger.info("master server is stopping ..., cause : {}", cause);

            // set stop signal is true
            Stopper.stop();

            try {
                //thread sleep 3 seconds for thread quietly stop
                Thread.sleep(3000L);
            }catch (Exception e){
                logger.warn("thread sleep exception ", e);
            }
            //
            this.masterSchedulerService.close();
            this.nettyRemotingServer.close();
            this.masterRegistry.unRegistry();
            this.zkMasterClient.close();
            //close quartz
            try{
                QuartzExecutors.getInstance().shutdown();
                logger.info("Quartz service stopped");
            }catch (Exception e){
                logger.warn("Quartz service stopped exception:{}",e.getMessage());
            }
        } catch (Exception e) {
            logger.error("master server stop exception ", e);
            System.exit(-1);
        }
    }
}

 

监听器注册org.apache.dolphinscheduler.server.master.registry.MasterRegistry类

 

/**
     * registry
     */
    public void registry() {
        String address = NetUtils.getHost();
        //获取master的zk节点 localNodePath = /dolphinscheduler/nodes/master/xxxx:5678
        String localNodePath = getMasterPath();
        // 通过service模块下zk的代码 来创建临时节点 路径为/dolphinscheduler/nodes/master/xxxx:5678
        zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, "");
        // 初始化curator客户端,并监听状态变化
        zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable().addListener(new ConnectionStateListener() {
            @Override
            public void stateChanged(CuratorFramework client, ConnectionState newState) {
                if (newState == ConnectionState.LOST) {
                    // client连接断开
                    logger.error("master : {} connection lost from zookeeper", address);
                } else if (newState == ConnectionState.RECONNECTED) {
                    // client重新连接,在zk中重新创建节点,因为是临时节点,会失效
                    logger.info("master : {} reconnected to zookeeper", address);
                    zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, "");
                } else if (newState == ConnectionState.SUSPENDED) {
            // client连接终止 logger.warn(
"master : {} connection SUSPENDED ", address); } } }); // 获取zk配置的心跳间隔时间的参数 int masterHeartbeatInterval = masterConfig.getMasterHeartbeatInterval(); // 初始化heartBeatTask实例 HeartBeatTask这个类继承了Thread类,重写了run()方法 HeartBeatTask heartBeatTask = new HeartBeatTask(startTime, masterConfig.getMasterReservedMemory(), masterConfig.getMasterMaxCpuloadAvg(), Sets.newHashSet(getMasterPath()), zookeeperRegistryCenter); //心跳线程,监测心跳是否正常 this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0, masterHeartbeatInterval, TimeUnit.SECONDS); logger.info("master node : {} registry to ZK path {} successfully with heartBeatInterval : {}s" , address, localNodePath, masterHeartbeatInterval); }

 

 

 


 

有两个点 需要看一下:

一:Runtime.getRuntime().addShutdownHook(shutdownHook);

Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
            @Override
            public void run() {
                close("shutdownHook");
            }
        }));


   这个方法的含义说明:
    这个方法的意思就是在jvm中增加一个关闭的钩子,当jvm关闭的时候,会执行系统中已经设置的所有通过方法addShutdownHook添加的钩子,当系统执行完这些钩子后,jvm才会关闭。所以这些钩子可以在jvm关闭的时候进行内存清理、对象销毁等操作。


 

二:PostConstruct

public static void main(String[] args) {
        Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER);
        new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args);
    }

    /**
     * run master server
     * @PostConstruct 会在该bean依赖注入完成后,执行该方法
     */
    @PostConstruct
    public void run(){

        //init remoting server
        //初始化 netty 服务器
        NettyServerConfig serverConfig = new NettyServerConfig();
        serverConfig.setListenPort(masterConfig.getListenPort());
        this.nettyRemotingServer.start();

        // register
        // 添加node/master/XXX:5678节点,并监听事件
        this.masterRegistry.registry();
}

 

在main方法中调用了MasterServer.class 此时构造方法还无法完成初始化,需要借助@PostConstruct,通过run()方法,完成MasterServer类的

对象的初始化。


这个参考https://www.cnblogs.com/erlou96/p/13753824.html

 

posted @ 2020-10-01 20:10  彬在俊  阅读(2222)  评论(0编辑  收藏  举报