海豚调度Dolphinscheduler源码分析(四)MasterServer的启动
//Curator是zk的一个客户端框架,其中分装了分布式公平可重入互斥锁,最为常见是InterProcessMutex
先简单总结下MasterServier服务的启动流程:
- 初始化netty服务器,并启动
- 通过zookeeper客户端Curator创建一个znode临时节点 /dolphinscheduler/nodes/master/<ip>:<port>
- 通过zookeeper客户端Curator对上面的znode注册监听器 (监听断开连接,重新连接,中止事件)因为是临时节点所以重新连接需要重新创建节点
- 尝试获取 znode节点 /dolphinscheduler/lock/failover/startup-masters 的分布式锁 调用了mutex.acquire();
- 启动一个zk客户端
- 启动master scheduler 服务
- 启动quartz 定时任务服务
- 添加一个jvm的钩子 当jvm关闭时,可以优雅的停止掉服务
分布式锁可以保证同一时间只有一个线程可以获取到锁,
今天来分析server模块的master,MasterServer类
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.remote.NettyRemotingServer;
import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.config.NettyServerConfig;
import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.processor.TaskAckProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskKillResponseProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskResponseProcessor;
import org.apache.dolphinscheduler.server.master.registry.MasterRegistry;
import org.apache.dolphinscheduler.server.master.runner.MasterSchedulerService;
import org.apache.dolphinscheduler.server.worker.WorkerServer;
import org.apache.dolphinscheduler.server.zk.ZKMasterClient;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.apache.dolphinscheduler.service.quartz.QuartzExecutors;
import org.quartz.SchedulerException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.WebApplicationType;
import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.FilterType;
import javax.annotation.PostConstruct;
@ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = {
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class})
})
public class MasterServer {
/**
* logger of MasterServer
*/
private static final Logger logger = LoggerFactory.getLogger(MasterServer.class);
/**
* master config
* master 配置类注入
*/
@Autowired
private MasterConfig masterConfig;
/**
* spring application context
* only use it for initialization
* 仅用于初始化
*/
@Autowired
private SpringApplicationContext springApplicationContext;
/**
* 网络远程服务器
* netty remote server
*/
private NettyRemotingServer nettyRemotingServer;
/**
* master registry
* master服务监听
*/
@Autowired
private MasterRegistry masterRegistry;
/**
* zk master client
* zk 客户端curator
*/
@Autowired
private ZKMasterClient zkMasterClient;
/**
* scheduler service
*/
@Autowired
private MasterSchedulerService masterSchedulerService;
/**
* master server startup
*
* master server not use web service
* @param args arguments
*/
public static void main(String[] args) {
Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER);
new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args);
}
/**
* run master server
* @PostConstruct 会在该bean依赖注入完成后,执行该方法
*/
@PostConstruct
public void run(){
//init remoting server
//初始化 netty 服务器
NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(masterConfig.getListenPort());
this.nettyRemotingServer = new NettyRemotingServer(serverConfig);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RESPONSE, new TaskResponseProcessor());
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_ACK, new TaskAckProcessor());
this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_RESPONSE, new TaskKillResponseProcessor());
this.nettyRemotingServer.start();
// register
// 添加node/master/XXX:5678节点,并监听事件
this.masterRegistry.registry();
// self tolerant
// 这个地方有个分布式锁,保持/dolphinscheduler/lock/failover/startup-masters
// 锁,然后创建znode节点,
// 并查询master任务和worker任务是否需要容错
this.zkMasterClient.start();
// scheduler start
// 启动master scheduler
// MasterSchedulerService,继承了Thread类
this.masterSchedulerService.start();
// start QuartzExecutors
// what system should do if exception
try {
logger.info("start Quartz server...");
// 开启quartzExecutor 服务
QuartzExecutors.getInstance().start();
} catch (Exception e) {
try {
QuartzExecutors.getInstance().shutdown();
} catch (SchedulerException e1) {
logger.error("QuartzExecutors shutdown failed : " + e1.getMessage(), e1);
}
logger.error("start Quartz failed", e);
}
/**
* register hooks, which are called before the process exits,在关闭程序时,jvm会先执行close方法
*/
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
close("shutdownHook");
}
}));
}
/**
* gracefully close
* @param cause close cause
*/
public void close(String cause) {
try {
//execute only once
if(Stopper.isStopped()){
return;
}
logger.info("master server is stopping ..., cause : {}", cause);
// set stop signal is true
Stopper.stop();
try {
//thread sleep 3 seconds for thread quietly stop
Thread.sleep(3000L);
}catch (Exception e){
logger.warn("thread sleep exception ", e);
}
//
this.masterSchedulerService.close();
this.nettyRemotingServer.close();
this.masterRegistry.unRegistry();
this.zkMasterClient.close();
//close quartz
try{
QuartzExecutors.getInstance().shutdown();
logger.info("Quartz service stopped");
}catch (Exception e){
logger.warn("Quartz service stopped exception:{}",e.getMessage());
}
} catch (Exception e) {
logger.error("master server stop exception ", e);
System.exit(-1);
}
}
}
监听器注册org.apache.dolphinscheduler.server.master.registry.MasterRegistry类
/**
* registry
*/
public void registry() {
String address = NetUtils.getHost();
//获取master的zk节点 localNodePath = /dolphinscheduler/nodes/master/xxxx:5678
String localNodePath = getMasterPath();
// 通过service模块下zk的代码 来创建临时节点 路径为/dolphinscheduler/nodes/master/xxxx:5678
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, "");
// 初始化curator客户端,并监听状态变化
zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable().addListener(new ConnectionStateListener() {
@Override
public void stateChanged(CuratorFramework client, ConnectionState newState) {
if (newState == ConnectionState.LOST) {
// client连接断开
logger.error("master : {} connection lost from zookeeper", address);
} else if (newState == ConnectionState.RECONNECTED) {
// client重新连接,在zk中重新创建节点,因为是临时节点,会失效
logger.info("master : {} reconnected to zookeeper", address);
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, "");
} else if (newState == ConnectionState.SUSPENDED) {
// client连接终止
logger.warn("master : {} connection SUSPENDED ", address);
}
}
});
// 获取zk配置的心跳间隔时间的参数
int masterHeartbeatInterval = masterConfig.getMasterHeartbeatInterval();
// 初始化heartBeatTask实例 HeartBeatTask这个类继承了Thread类,重写了run()方法
HeartBeatTask heartBeatTask = new HeartBeatTask(startTime,
masterConfig.getMasterReservedMemory(),
masterConfig.getMasterMaxCpuloadAvg(),
Sets.newHashSet(getMasterPath()),
zookeeperRegistryCenter);
//心跳线程,监测心跳是否正常
this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0, masterHeartbeatInterval, TimeUnit.SECONDS);
logger.info("master node : {} registry to ZK path {} successfully with heartBeatInterval : {}s"
, address, localNodePath, masterHeartbeatInterval);
}
有两个点 需要看一下:
一:Runtime.getRuntime().addShutdownHook(shutdownHook);
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override
public void run() {
close("shutdownHook");
}
}));
这个方法的含义说明:
这个方法的意思就是在jvm中增加一个关闭的钩子,当jvm关闭的时候,会执行系统中已经设置的所有通过方法addShutdownHook添加的钩子,当系统执行完这些钩子后,jvm才会关闭。所以这些钩子可以在jvm关闭的时候进行内存清理、对象销毁等操作。
二:PostConstruct
public static void main(String[] args) {
Thread.currentThread().setName(Constants.THREAD_NAME_MASTER_SERVER);
new SpringApplicationBuilder(MasterServer.class).web(WebApplicationType.NONE).run(args);
}
/**
* run master server
* @PostConstruct 会在该bean依赖注入完成后,执行该方法
*/
@PostConstruct
public void run(){
//init remoting server
//初始化 netty 服务器
NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(masterConfig.getListenPort());
this.nettyRemotingServer.start();
// register
// 添加node/master/XXX:5678节点,并监听事件
this.masterRegistry.registry();
}
在main方法中调用了MasterServer.class 此时构造方法还无法完成初始化,需要借助@PostConstruct,通过run()方法,完成MasterServer类的
对象的初始化。
这个参考https://www.cnblogs.com/erlou96/p/13753824.html