dolphinscheduler worker宕机故障转移能力源码分析
worker宕机,主要是worker上面跑的task需要及时发现及时重跑,顺着这个思路来进行源码的分析
1. MasterRegistryDataListener
这是向Zookeeper 注册的回调类,当Zookeeper数据产生变化,会回调它的notify
方法
@Override
public void notify(Event event) {
final String path = event.path();
if (Strings.isNullOrEmpty(path)) {
return;
}
// monitor master
if (path.startsWith(RegistryNodeType.MASTER.getRegistryPath() + Constants.SINGLE_SLASH)) {
handleMasterEvent(event);
} else if (path.startsWith(RegistryNodeType.WORKER.getRegistryPath() + Constants.SINGLE_SLASH)) {
// monitor worker
handleWorkerEvent(event);
}
}
当有worker宕机时,会调用到handleWorkerEvent
private void handleWorkerEvent(Event event) {
final String path = event.path();
switch (event.type()) {
case ADD:
log.info("worker node added : {}", path);
break;
case REMOVE:
log.info("worker node deleted : {}", path);
masterRegistryClient.removeWorkerNodePath(path, RegistryNodeType.WORKER, true);
break;
default:
break;
}
}
会走到masterRegistryClient.removeWorkerNodePath
2. masterRegistryClient
public void removeWorkerNodePath(String path, RegistryNodeType nodeType, boolean failover) {
log.info("{} node deleted : {}", nodeType, path);
......
// failover server 在这里实现故障转移
if (failover) {
failoverService.failoverServerWhenDown(serverHost, nodeType);
}
} catch (Exception e) {
log.error("{} server failover failed", nodeType, e);
}
}
这个方法名叫做删除worker节点路径,其实回调前到这里前早就被删了, 这里主要是为了执行failoverService.failoverServerWhenDown
4. failoverService
public void failoverServerWhenDown(String serverHost, RegistryNodeType nodeType) {
switch (nodeType) {
case MASTER:
log.info("Master failover starting, masterServer: {}", serverHost);
masterFailoverService.failoverMaster(serverHost);
log.info("Master failover finished, masterServer: {}", serverHost);
break;
case WORKER:
log.info("Worker failover starting, workerServer: {}", serverHost);
workerFailoverService.failoverWorker(serverHost);
log.info("Worker failover finished, workerServer: {}", serverHost);
break;
default:
break;
}
}
这里会走到workerFailoverService.failoverWorker
4. workerFailoverService
public void failoverWorker(@NonNull String workerHost) {
log.info("Worker[{}] failover starting", workerHost);
final StopWatch failoverTimeCost = StopWatch.createStarted();
// we query the task instance from cache, so that we can directly update the cache
final Optional<Date> needFailoverWorkerStartTime =
getServerStartupTime(registryClient.getServerList(RegistryNodeType.WORKER), workerHost);
// 1. 这里拿到需要转移的task
final List<TaskInstance> needFailoverTaskInstanceList = getNeedFailoverTaskInstance(workerHost);
if (CollectionUtils.isEmpty(needFailoverTaskInstanceList)) {
log.info("Worker[{}] failover finished there are no taskInstance need to failover", workerHost);
return;
}
log.info(
"Worker[{}] failover there are {} taskInstance may need to failover, will do a deep check, taskInstanceIds: {}",
workerHost,
needFailoverTaskInstanceList.size(),
needFailoverTaskInstanceList.stream().map(TaskInstance::getId).collect(Collectors.toList()));
final Map<Integer, ProcessInstance> processInstanceCacheMap = new HashMap<>();
for (TaskInstance taskInstance : needFailoverTaskInstanceList) {
try {
LogUtils.setWorkflowAndTaskInstanceIDMDC(taskInstance.getProcessInstanceId(), taskInstance.getId());
ProcessInstance processInstance = processInstanceCacheMap.computeIfAbsent(
taskInstance.getProcessInstanceId(), k -> {
WorkflowExecuteRunnable workflowExecuteRunnable = cacheManager.getByProcessInstanceId(
taskInstance.getProcessInstanceId());
if (workflowExecuteRunnable == null) {
return null;
}
return workflowExecuteRunnable.getWorkflowExecuteContext()
.getWorkflowInstance();
});
if (!checkTaskInstanceNeedFailover(needFailoverWorkerStartTime, processInstance, taskInstance)) {
log.info("Worker[{}] the current taskInstance doesn't need to failover", workerHost);
continue;
}
log.info(
"Worker[{}] failover: begin to failover taskInstance, will set the status to NEED_FAULT_TOLERANCE",
workerHost);
// 2. 在这里故障转移task实例
failoverTaskInstance(processInstance, taskInstance);
log.info("Worker[{}] failover: Finish failover taskInstance", workerHost);
} catch (Exception ex) {
log.info("Worker[{}] failover taskInstance occur exception", workerHost, ex);
} finally {
LogUtils.removeWorkflowAndTaskInstanceIdMDC();
}
}
failoverTimeCost.stop();
log.info("Worker[{}] failover finished, useTime:{}ms",
workerHost,
failoverTimeCost.getTime(TimeUnit.MILLISECONDS));
}
- (注释的)第一步中,从缓存中读取正在跑的工作流实例,在里面匹配是否有workerIP下跑的Task,把Task都找出来,这样就实现了,每个master只处理自己对应的工作流下的task的回滚任务
- (注释的)第二步
failoverTaskInstance
,执行转移操作
private void failoverTaskInstance(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
TaskMetrics.incTaskInstanceByState("failover");
taskInstance.setProcessInstance(processInstance);
if (!TaskUtils.isMasterTask(taskInstance.getTaskType())) {
killYarnTask(taskInstance, processInstance);
} else {
log.info("The failover taskInstance is a master task, no need to failover in worker failover");
}
taskInstance.setState(TaskExecutionStatus.NEED_FAULT_TOLERANCE);
taskInstance.setFlag(Flag.NO);
taskInstanceDao.upsertTaskInstance(taskInstance);
TaskStateEvent stateEvent = TaskStateEvent.builder()
.processInstanceId(processInstance.getId())
.taskInstanceId(taskInstance.getId())
.status(TaskExecutionStatus.NEED_FAULT_TOLERANCE)
.type(StateEventType.TASK_STATE_CHANGE)
.build();
workflowExecuteThreadPool.submitStateEvent(stateEvent);
}
这里做了两件事情,
- 杀掉yarn或者k8s中的任务进程
- 更新Task状态到
NEED_FAULT_TOLERANCE
,并持久化 - 提交任务状态变更事件用于异步处理,事件的类型为
TASK_STATE_CHANGE
5. 异步事件处理,对应的Handler为 TaskStateEventHandler
public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable,
StateEvent stateEvent) throws StateEventHandleException, StateEventHandleError {
......
// 这里task 状态为 NEED_FAULT_TOLERANCE
if (task.getState().isFinished()) {
if (completeTaskSet.contains(task.getTaskCode())) {
log.warn("The task instance is already complete, stateEvent: {}", stateEvent);
return true;
}
//
workflowExecuteRunnable.taskFinished(task);
return true;
}
return true;
}
task.getState().isFinished()
判断中当状态为 NEED_FAULT_TOLERANCE
就会判断为true
,所以会调用到workflowExecuteRunnable.taskFinished
6. workflowExecuteRunnable
public void taskFinished(TaskInstance taskInstance) throws StateEventHandleException {
log.info("TaskInstance finished task code:{} state:{}", taskInstance.getTaskCode(), taskInstance.getState());
try {
......
if (taskInstance.getState().isSuccess()) {
......
} else if (taskInstance.taskCanRetry() && !workflowInstance.getState().isReadyStop()) {
// retry task
log.info("Retry taskInstance taskInstance state: {}", taskInstance.getState());
retryTaskInstance(taskInstance);
} else if (taskInstance.getState().isFailure()) {
......
} else if (taskInstance.getState().isFinished()) {
......
}
log.info("TaskInstance finished will try to update the workflow instance state, task code:{} state:{}",
taskInstance.getTaskCode(),
taskInstance.getState());
this.updateProcessInstanceState();
// log the taskInstance in detail after task is finished
log.info(WorkflowInstanceUtils.logTaskInstanceInDetail(taskInstance));
sendTaskLogOnMasterToRemoteIfNeeded(taskInstance);
} catch (Exception ex) {
log.error("Task finish failed, get a exception, will remove this taskInstance from completeTaskSet", ex);
// remove the task from complete map, so that we can finish in the next time.
completeTaskSet.remove(taskInstance.getTaskCode());
throw ex;
}
}
最终会走到retryTaskInstance
private void retryTaskInstance(TaskInstance taskInstance) throws StateEventHandleException {
ProcessInstance workflowInstance = workflowExecuteContext.getWorkflowInstance();
if (!taskInstance.taskCanRetry()) {
return;
}
//
TaskInstance newTaskInstance = cloneRetryTaskInstance(taskInstance);
if (newTaskInstance == null) {
log.error("Retry task fail because new taskInstance is null, task code:{}, task id:{}",
taskInstance.getTaskCode(),
taskInstance.getId());
return;
}
waitToRetryTaskInstanceMap.put(newTaskInstance.getTaskCode(), newTaskInstance);
if (!taskInstance.retryTaskIntervalOverTime()) {
log.info(
"Failure task will be submitted, process id: {}, task instance code: {}, state: {}, retry times: {} / {}, interval: {}",
workflowInstance.getId(), newTaskInstance.getTaskCode(),
newTaskInstance.getState(), newTaskInstance.getRetryTimes(), newTaskInstance.getMaxRetryTimes(),
newTaskInstance.getRetryInterval());
stateWheelExecuteThread.addTask4TimeoutCheck(workflowInstance, newTaskInstance);
stateWheelExecuteThread.addTask4RetryCheck(workflowInstance, newTaskInstance);
} else {
addTaskToStandByList(newTaskInstance);
submitStandByTask();
waitToRetryTaskInstanceMap.remove(newTaskInstance.getTaskCode());
}
}
这里clone了一下task bing调用addTaskToStandByList
和submitStandByTask
重新选择存活的worker执行这个任务了
本文作者:明月照江江
本文链接:https://www.cnblogs.com/gradyblog/p/18078601
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
2023-03-17 【Android 逆向】【攻防世界】ill-intentions
2023-03-17 【Android 逆向】【攻防世界】人民的名义-抓捕赵德汉1-200
2023-03-17 【Android 逆向】【攻防世界】APK逆向