海豚调度Dolphinscheduler源码分析(四)MasterServer的启动
//Curator是zk的一个客户端框架,其中分装了分布式公平可重入互斥锁,最为常见是InterProcessMutex
先简单总结下MasterServier服务的启动流程:
- 初始化netty服务器,并启动
- 通过zookeeper客户端Curator创建一个znode临时节点 /dolphinscheduler/nodes/master/<ip>:<port>,如果主机因为宕机,网络等问题,临时节点会消失。
- 通过zookeeper客户端Curator对上面的znode注册监听器 (监听断开连接,重新连接,中止事件)因为是临时节点所以重新连接需要重新创建节点
- 尝试获取 znode节点 /dolphinscheduler/lock/failover/startup-masters 的分布式锁 调用了mutex.acquire();获取锁,只有一个线程可以在同一时获取到锁,然后成为Master(active),没有获取到锁的,需要在上面注册watcher。
- 启动一个Master的zk客户端
- 启动master scheduler 服务
- 启动quartz 定时任务服务
- 添加一个jvm的钩子 当jvm关闭时,可以优雅的停止掉服务
分布式锁可以保证同一时间只有一个线程可以获取到锁,
今天来分析server模块的master,MasterServer类
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.dolphinscheduler.server.master; import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.remote.NettyRemotingServer; import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.config.NettyServerConfig; import org.apache.dolphinscheduler.server.master.config.MasterConfig; import org.apache.dolphinscheduler.server.master.processor.TaskAckProcessor; import org.apache.dolphinscheduler.server.master.processor.TaskKillResponseProcessor; import org.apache.dolphinscheduler.server.master.processor.TaskResponseProcessor; import org.apache.dolphinscheduler.server.master.registry.MasterRegistry; import org.apache.dolphinscheduler.server.master.runner.MasterSchedulerService; import org.apache.dolphinscheduler.server.worker.WorkerServer; import org.apache.dolphinscheduler.server.zk.ZKMasterClient; import org.apache.dolphinscheduler.service.bean.SpringApplicationContext; import org.apache.dolphinscheduler.service.quartz.QuartzExecutors; import org.quartz.SchedulerException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.WebApplicationType; import org.springframework.boot.builder.SpringApplicationBuilder; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.FilterType; import javax.annotation.PostConstruct; @ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = { @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class}) }) public class MasterServer { /** * logger of MasterServer */ private static final Logger logger = LoggerFactory.getLogger(MasterServer.class); /** * master config * master 配置类注入 */ @Autowired private MasterConfig masterConfig; /** * spring application context * only use it for initialization * 仅用于初始化 */ @Autowired private SpringApplicationContext springApplicationContext; /** * 网络远程服务器 * netty remote server */ private NettyRemotingServer nettyRemotingServer; /** * master registry * master服务监听 */ @Autowired private MasterRegistry masterRegistry; /** * zk master client * zk 客户端curator */ @Autowired private ZKMasterClient zkMasterClient; /** * scheduler service */ @Autowired private MasterSchedulerService masterSchedulerService; /** * master server startup * * master server not use web service * @param args arguments */ public static void main(String[] args) { Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER); new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args); } /** * run master server * @PostConstruct 会在该bean依赖注入完成后,执行该方法 */ @PostConstruct public void run(){ //init remoting server //初始化 netty 服务器 NettyServerConfig serverConfig = new NettyServerConfig(); serverConfig.setListenPort(masterConfig.getListenPort()); this.nettyRemotingServer = new NettyRemotingServer(serverConfig); this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RESPONSE, new TaskResponseProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_ACK, new TaskAckProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_RESPONSE, new TaskKillResponseProcessor()); this.nettyRemotingServer.start(); // register // 添加node/master/XXX:5678节点,并监听事件 this.masterRegistry.registry(); // self tolerant // 这个地方有个分布式锁,保持/dolphinscheduler/lock/failover/startup-masters // 锁,然后创建znode节点, // 并查询master任务和worker任务是否需要容错 this.zkMasterClient.start(); // scheduler start // 启动master scheduler // MasterSchedulerService,继承了Thread类 this.masterSchedulerService.start(); // start QuartzExecutors // what system should do if exception try { logger.info("start Quartz server..."); // 开启quartzExecutor 服务 QuartzExecutors.getInstance().start(); } catch (Exception e) { try { QuartzExecutors.getInstance().shutdown(); } catch (SchedulerException e1) { logger.error("QuartzExecutors shutdown failed : " + e1.getMessage(), e1); } logger.error("start Quartz failed", e); } /** * register hooks, which are called before the process exits,在关闭程序时,jvm会先执行close方法 */ Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { @Override public void run() { close("shutdownHook"); } })); } /** * gracefully close * @param cause close cause */ public void close(String cause) { try { //execute only once if(Stopper.isStopped()){ return; } logger.info("master server is stopping ..., cause : {}", cause); // set stop signal is true Stopper.stop(); try { //thread sleep 3 seconds for thread quietly stop Thread.sleep(3000L); }catch (Exception e){ logger.warn("thread sleep exception ", e); } // this.masterSchedulerService.close(); this.nettyRemotingServer.close(); this.masterRegistry.unRegistry(); this.zkMasterClient.close(); //close quartz try{ QuartzExecutors.getInstance().shutdown(); logger.info("Quartz service stopped"); }catch (Exception e){ logger.warn("Quartz service stopped exception:{}",e.getMessage()); } } catch (Exception e) { logger.error("master server stop exception ", e); System.exit(-1); } } }
监听器注册org.apache.dolphinscheduler.server.master.registry.MasterRegistry类
/** * registry */ public void registry() { String address = NetUtils.getHost(); //获取master的zk节点 localNodePath = /dolphinscheduler/nodes/master/xxxx:5678 String localNodePath = getMasterPath(); // 通过service模块下zk的代码 来创建临时节点 路径为/dolphinscheduler/nodes/master/xxxx:5678 zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, ""); // 初始化curator客户端,并监听状态变化 zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable().addListener(new ConnectionStateListener() { @Override public void stateChanged(CuratorFramework client, ConnectionState newState) { if (newState == ConnectionState.LOST) { // client连接断开 logger.error("master : {} connection lost from zookeeper", address); } else if (newState == ConnectionState.RECONNECTED) { // client重新连接,在zk中重新创建节点,因为是临时节点,会失效 logger.info("master : {} reconnected to zookeeper", address); zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, ""); } else if (newState == ConnectionState.SUSPENDED) {
// client连接终止 logger.warn("master : {} connection SUSPENDED ", address); } } }); // 获取zk配置的心跳间隔时间的参数 int masterHeartbeatInterval = masterConfig.getMasterHeartbeatInterval(); // 初始化heartBeatTask实例 HeartBeatTask这个类继承了Thread类,重写了run()方法 HeartBeatTask heartBeatTask = new HeartBeatTask(startTime, masterConfig.getMasterReservedMemory(), masterConfig.getMasterMaxCpuloadAvg(), Sets.newHashSet(getMasterPath()), zookeeperRegistryCenter); //心跳线程,监测心跳是否正常 this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0, masterHeartbeatInterval, TimeUnit.SECONDS); logger.info("master node : {} registry to ZK path {} successfully with heartBeatInterval : {}s" , address, localNodePath, masterHeartbeatInterval); }
有两个点 需要看一下:
一:Runtime.getRuntime().addShutdownHook(shutdownHook);
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { @Override public void run() { close("shutdownHook"); } }));
这个方法的含义说明:
这个方法的意思就是在jvm中增加一个关闭的钩子,当jvm关闭的时候,会执行系统中已经设置的所有通过方法addShutdownHook添加的钩子,当系统执行完这些钩子后,jvm才会关闭。所以这些钩子可以在jvm关闭的时候进行内存清理、对象销毁等操作。
二:PostConstruct
public static void main(String[] args) { Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER); new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args); } /** * run master server * @PostConstruct 会在该bean依赖注入完成后,执行该方法 */ @PostConstruct public void run(){ //init remoting server //初始化 netty 服务器 NettyServerConfig serverConfig = new NettyServerConfig(); serverConfig.setListenPort(masterConfig.getListenPort()); this.nettyRemotingServer.start(); // register // 添加node/master/XXX:5678节点,并监听事件 this.masterRegistry.registry(); }
在main方法中调用了MasterServer.class 此时构造方法还无法完成初始化,需要借助@PostConstruct,通过run()方法,完成MasterServer类的
对象的初始化。
这个参考https://www.cnblogs.com/erlou96/p/13753824.html