DolphinScheduler 源码分析之 DAG类
1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.dolphinscheduler.common.graph; 18 19 import org.apache.dolphinscheduler.common.utils.CollectionUtils; 20 import org.slf4j.Logger; 21 import org.slf4j.LoggerFactory; 22 23 import java.util.*; 24 import java.util.concurrent.locks.ReadWriteLock; 25 import java.util.concurrent.locks.ReentrantReadWriteLock; 26 27 /** 28 * analysis of DAG 29 * 对DAG图的分析 30 * Node: node 指代一个节点,其实一般是Int类型的数字,比如 1 31 * NodeInfo:node description information 指代节点的描述信息,其实一般是String类型 ,比如 “v(1)” 32 * EdgeInfo: edge description information 其实一般是String类型 比如 “edge(1 -> 2)” 33 */ 34 public class DAG<Node, NodeInfo, EdgeInfo> { 35 36 37 private static final Logger logger = LoggerFactory.getLogger(DAG.class); 38 39 private final ReadWriteLock lock = new ReentrantReadWriteLock(); 40 41 /** 42 * node map, key is node, value is node information 43 * 节点映射,键是节点,值是节点信息 44 */ 45 private volatile Map<Node, NodeInfo> nodesMap; 46 47 /** 48 * edge map. key is node of origin;value is Map with key for destination node and value for edge 49 * 边的映射。key是起始节点,value是一个Map,这个Map又是以目标节点作为Key,边的信息作为value的。 50 */ 51 private volatile Map<Node, Map<Node, EdgeInfo>> edgesMap; 52 53 /** 54 * reversed edge set,key is node of destination, value is Map with key for origin node and value for edge 55 * 反转的边的映射。key是目标点,value是一个Map。这个Map又是以起始点为Key,边的信息作为value的。 56 */ 57 private volatile Map<Node, Map<Node, EdgeInfo>> reverseEdgesMap; 58 59 60 public DAG() { 61 nodesMap = new HashMap<>(); 62 edgesMap = new HashMap<>(); 63 reverseEdgesMap = new HashMap<>(); 64 } 65 66 67 /** 68 * add node information 69 * 添加一个节点 70 * 71 * @param node node 72 * @param nodeInfo node information 73 */ 74 public void addNode(Node node, NodeInfo nodeInfo) { 75 lock.writeLock().lock(); 76 77 try{ 78 nodesMap.put(node, nodeInfo); 79 }finally { 80 lock.writeLock().unlock(); 81 } 82 83 } 84 85 86 /** 87 * add edge 88 * 添加边 89 * @param fromNode node of origin 起始点 90 * @param toNode node of destination 目标点 91 * @return The result of adding an edge. returns false if the DAG result is a ring result 92 * 返回值是一个波尔类型的值,如果添加边之后会形成一个环图,那么就会返回false,并且不会添加这条边,如果添加边之后不会形成一个环图,那么就会返回true。 93 */ 94 public boolean addEdge(Node fromNode, Node toNode) { 95 return addEdge(fromNode, toNode, false); 96 } 97 98 99 /** 100 * add edge 101 * 添加边 102 * @param fromNode node of origin 起始点 103 * @param toNode node of destination 目标点 104 * @param createNode whether the node needs to be created if it does not exist 105 * 如果我添加新的边的时候,起始点或者目标点不存在。 106 * 那么我需要看这个createNode参数,如果这里传入true,代表同时可以建立Node,如果是false,代表不可以擅自新建Node。 107 * @return The result of adding an edge. returns false if the DAG result is a ring result 108 * 返回值是一个波尔类型的值,如果添加边之后会形成一个环图,那么就会返回false,并且不会添加这条边,如果添加边之后不会形成一个环图,那么就会返回true。 109 */ 110 private boolean addEdge(Node fromNode, Node toNode, boolean createNode) { 111 return addEdge(fromNode, toNode, null, createNode); 112 } 113 114 115 /** 116 * add edge 117 * 添加边 118 * 119 * @param fromNode node of origin 起始节点 120 * @param toNode node of destination 目标节点 121 * @param edge edge description 边描述 122 * @param createNode whether the node needs to be created if it does not exist 123 * 如果我添加新的边的时候,起始点或者目标点不存在。 124 * 那么我需要看这个createNode参数,如果这里传入true,代表同时可以建立Node,如果是false,代表不可以擅自新建Node。 125 * @return The result of adding an edge. returns false if the DAG result is a ring result 126 * 返回值是一个波尔类型的值,如果添加边之后会形成一个环图,那么就会返回false,并且不会添加这条边,如果添加边之后不会形成一个环图,那么就会返回true。 127 */ 128 public boolean addEdge(Node fromNode, Node toNode, EdgeInfo edge, boolean createNode) { 129 lock.writeLock().lock(); 130 131 try{ 132 133 // Whether an edge can be successfully added(fromNode -> toNode) 134 // 判断边是否可以成功被添加(起始点-->目标点) 135 if (!isLegalAddEdge(fromNode, toNode, createNode)) { 136 logger.error("serious error: add edge({} -> {}) is invalid, cause cycle!", fromNode, toNode); 137 return false; 138 } 139 140 addNodeIfAbsent(fromNode, null); 141 addNodeIfAbsent(toNode, null); 142 143 addEdge(fromNode, toNode, edge, edgesMap); 144 addEdge(toNode, fromNode, edge, reverseEdgesMap); 145 146 return true; 147 }finally { 148 lock.writeLock().unlock(); 149 } 150 151 } 152 153 154 /** 155 * whether this node is contained 156 * 检查node的map中是否存在这个节点 157 * 158 * @param node node 节点 159 * @return true if contains 返回true,如果存在的话 160 */ 161 public boolean containsNode(Node node) { 162 lock.readLock().lock(); 163 164 try{ 165 return nodesMap.containsKey(node); 166 }finally { 167 lock.readLock().unlock(); 168 } 169 } 170 171 172 /** 173 * whether this edge is contained 174 * 检查是否存在边 175 * 176 * @param fromNode node of origin 起始点 177 * @param toNode node of destination 目标点 178 * @return true if contains 返回true如果存在的话 179 */ 180 public boolean containsEdge(Node fromNode, Node toNode) { 181 lock.readLock().lock(); 182 try{ 183 Map<Node, EdgeInfo> endEdges = edgesMap.get(fromNode); 184 if (endEdges == null) { 185 return false; 186 } 187 188 return endEdges.containsKey(toNode); 189 }finally { 190 lock.readLock().unlock(); 191 } 192 } 193 194 195 /** 196 * get node description 197 * 获取node的nodeInfo 198 * 199 * @param node node 要查询的node 200 * @return node description 返回节点的描述信息 201 */ 202 public NodeInfo getNode(Node node) { 203 lock.readLock().lock(); 204 205 try{ 206 return nodesMap.get(node); 207 }finally { 208 lock.readLock().unlock(); 209 } 210 } 211 212 213 /** 214 * Get the number of nodes 215 * 获取node的数量 216 * 217 * @return the number of nodes 返回node的数量 218 */ 219 public int getNodesCount() { 220 lock.readLock().lock(); 221 222 try{ 223 return nodesMap.size(); 224 }finally { 225 lock.readLock().unlock(); 226 } 227 } 228 229 /** 230 * Get the number of edges 231 * 获取边的数量 232 * @return the number of edges 返回边的数量 233 */ 234 public int getEdgesCount() { 235 lock.readLock().lock(); 236 try{ 237 int count = 0; 238 239 for (Map.Entry<Node, Map<Node, EdgeInfo>> entry : edgesMap.entrySet()) { 240 count += entry.getValue().size(); 241 } 242 243 return count; 244 }finally { 245 lock.readLock().unlock(); 246 } 247 } 248 249 250 /** 251 * get the start node of DAG 252 * 获取一幅图中只有出度,没有入度的节点的集合 253 * 254 * @return the start node of DAG 返回一幅图中只有出度,没有入度的节点的集合 255 */ 256 public Collection<Node> getBeginNode() { 257 lock.readLock().lock(); 258 259 try{ 260 return CollectionUtils.subtract(nodesMap.keySet(), reverseEdgesMap.keySet()); 261 }finally { 262 lock.readLock().unlock(); 263 } 264 265 } 266 267 268 /** 269 * get the end node of DAG 270 * 获取一幅图中只有入度,没有出度的节点的集合 271 * 272 * @return the end node of DAG 返回一幅图中只有入度,没有出度的节点的集合 273 */ 274 public Collection<Node> getEndNode() { 275 276 lock.readLock().lock(); 277 278 try{ 279 return CollectionUtils.subtract(nodesMap.keySet(), edgesMap.keySet()); 280 }finally { 281 lock.readLock().unlock(); 282 } 283 284 } 285 286 287 /** 288 * Gets all previous nodes of the node 289 * 传入一个node,返回所有指向这个node的node集合 290 * 291 * @param node node id to be calculated 传入要查询的node 292 * @return all previous nodes of the node 返回所有指向这个node的node集合 293 */ 294 public Set<Node> getPreviousNodes(Node node) { 295 lock.readLock().lock(); 296 297 try{ 298 return getNeighborNodes(node, reverseEdgesMap); 299 }finally { 300 lock.readLock().unlock(); 301 } 302 } 303 304 305 /** 306 * Get all subsequent nodes of the node 307 * 传入一个node,返回所有的该节点指向的node集合 308 * 309 * @param node node id to be calculated 传入要查询的node的id 310 * @return all subsequent nodes of the node 返回的该节点指向的所有node 311 */ 312 public Set<Node> getSubsequentNodes(Node node) { 313 lock.readLock().lock(); 314 315 try{ 316 return getNeighborNodes(node, edgesMap); 317 }finally { 318 lock.readLock().unlock(); 319 } 320 } 321 322 323 /** 324 * Gets the degree of entry of the node 325 * 计算一个节点的入度 326 * 327 * @param node node id 被计算的节点 328 * @return the degree of entry of the node 节点的入度 329 */ 330 public int getIndegree(Node node) { 331 lock.readLock().lock(); 332 333 try{ 334 return getPreviousNodes(node).size(); 335 }finally { 336 lock.readLock().unlock(); 337 } 338 } 339 340 341 /** 342 * whether the graph has a ring 343 * 判断这个图是否存在环 344 * 345 * @return true if has cycle, else return false. 346 * 如果存在环,返回true。没有形成环状,返回false 347 */ 348 public boolean hasCycle() { 349 lock.readLock().lock(); 350 try{ 351 return !topologicalSortImpl().getKey(); 352 }finally { 353 lock.readLock().unlock(); 354 } 355 } 356 357 358 /** 359 * Only DAG has a topological sort 360 * 返回一个DAG图的拓扑排序 361 * @return topologically sorted results, returns false if the DAG result is a ring result 362 * 返回一个DAG图的拓扑排序,如果这个DAG其实是环图,那么返回false,也就是非DAG图不存在拓扑排序。 363 * @throws Exception errors 364 */ 365 public List<Node> topologicalSort() throws Exception { 366 lock.readLock().lock(); 367 368 try{ 369 Map.Entry<Boolean, List<Node>> entry = topologicalSortImpl(); 370 371 if (entry.getKey()) { 372 return entry.getValue(); 373 } 374 375 throw new Exception("serious error: graph has cycle ! "); 376 }finally { 377 lock.readLock().unlock(); 378 } 379 } 380 381 382 /** 383 * if tho node does not exist,add this node 384 * 如果节点不存在,则添加该节点 385 * @param node node 节点id 386 * @param nodeInfo node information 节点描述信息 387 */ 388 private void addNodeIfAbsent(Node node, NodeInfo nodeInfo) { 389 if (!containsNode(node)) { 390 addNode(node, nodeInfo); 391 } 392 } 393 394 395 /** 396 * add edge 397 * 添加边 398 * 399 * @param fromNode node of origin 起始点 400 * @param toNode node of destination 目标点 401 * @param edge edge description 边的描述信息 402 * @param edges edge set 所有边的集合 403 */ 404 private void addEdge(Node fromNode, Node toNode, EdgeInfo edge, Map<Node, Map<Node, EdgeInfo>> edges) { 405 edges.putIfAbsent(fromNode, new HashMap<>()); 406 Map<Node, EdgeInfo> toNodeEdges = edges.get(fromNode); 407 toNodeEdges.put(toNode, edge); 408 } 409 410 411 /** 412 * Whether an edge can be successfully added(fromNode -> toNode) 413 * 判断是否新的 边 可以被成功添加进现有的图中(起始点-->目标点) 414 * need to determine whether the DAG has cycle 415 * 需要判断是否会形成一个环状图 416 * @param fromNode node of origin 起始点 417 * @param toNode node of destination 目标点 418 * @param createNode whether to create a node 419 * 如果我添加新的边的时候,起始点或者目标点不存在。 420 * 那么我需要看这个createNode参数,如果这里传入true,代表同时可以建立Node,如果是false,代表不可以擅自新建Node。 421 * @return true if added 422 * 返回true,如果节点被成功添加,否则返回false。 423 */ 424 private boolean isLegalAddEdge(Node fromNode, Node toNode, boolean createNode) { 425 if (fromNode.equals(toNode)) { 426 logger.error("edge fromNode({}) can't equals toNode({})", fromNode, toNode); 427 return false; 428 } 429 430 if (!createNode) { 431 if (!containsNode(fromNode) || !containsNode(toNode)){ 432 logger.error("edge fromNode({}) or toNode({}) is not in vertices map", fromNode, toNode); 433 return false; 434 } 435 } 436 437 // Whether an edge can be successfully added(fromNode -> toNode),need to determine whether the DAG has cycle! 438 // 判断是否新的边能被成功添加进图中(起始点-->目标点),需要判断是否会形成有环图。 439 int verticesCount = getNodesCount(); 440 441 Queue<Node> queue = new LinkedList<>(); 442 443 queue.add(toNode); 444 445 // if DAG doesn't find fromNode, it's not has cycle! 446 // 把目标点添加到队列,然后不断循环检查,目标节点的下一个节点会不会是起始点 447 while (!queue.isEmpty() && (--verticesCount > 0)) { 448 Node key = queue.poll(); 449 450 for (Node subsequentNode : getSubsequentNodes(key)) { 451 if (subsequentNode.equals(fromNode)) { 452 return false; 453 } 454 455 queue.add(subsequentNode); 456 } 457 } 458 459 return true; 460 } 461 462 463 /** 464 * Get all neighbor nodes of the node 465 * 获取待查询节点的下一个节点的集合,一个节点可能指向多个节点 466 * 467 * @param node Node id to be calculated 需要查询的node id 468 * @param edges neighbor edge information 现有的所有边的信息 469 * @return all neighbor nodes of the node 返回相邻的节点的集合 470 */ 471 private Set<Node> getNeighborNodes(Node node, final Map<Node, Map<Node, EdgeInfo>> edges) { 472 final Map<Node, EdgeInfo> neighborEdges = edges.get(node); 473 474 if (neighborEdges == null) { 475 return Collections.EMPTY_MAP.keySet(); 476 } 477 478 return neighborEdges.keySet(); 479 } 480 481 482 483 /** 484 * Determine whether there are ring and topological sorting results 485 * 确定是否有环,返回拓扑排序结果 486 * Directed acyclic graph (DAG) has topological ordering 487 * 有向无环有向无环图(DAG)具有拓扑序 488 * Breadth First Search:广度优先搜索: 489 * 1、Traversal of all the vertices in the graph, the degree of entry is 0 vertex into the queue 490 * 1、遍历图中的所有顶点,进入队列的度为0 491 * 2、Poll a vertex in the queue to update its adjacency (minus 1) and queue the adjacency if it is 0 after minus 1 492 * 2、轮询队列中的一个顶点,以更新其邻接点(减一) ,如果该顶点在减一之后是0,则对其邻接点进行排队 493 * 3、Do step 2 until the queue is empty 494 * 3、执行步骤2,直到队列是空的 495 * If you cannot traverse all the nodes, it means that the current graph is not a directed acyclic graph. 496 * 如果你不能遍历所有的节点,这意味着当前的图不是一个有向无环图。 497 * There is no topological sort. 498 * 没有拓扑排序。 499 * 500 * 501 * @return key Returns the state 502 * if success (acyclic) is true, failure (acyclic) is looped, 503 * and value (possibly one of the topological sort results) 504 * 如果成功(非循环)为 true,返回true和拓扑排序序列。如果是循环图,则返回false。 505 */ 506 private Map.Entry<Boolean, List<Node>> topologicalSortImpl() { 507 // node queue with degree of entry 0 508 //入度为0的所有节点 509 Queue<Node> zeroIndegreeNodeQueue = new LinkedList<>(); 510 // save result 511 //保存结果 512 List<Node> topoResultList = new ArrayList<>(); 513 // save the node whose degree is not 0 514 //保存所有的入度不为0的节点 515 Map<Node, Integer> notZeroIndegreeNodeMap = new HashMap<>(); 516 517 // Scan all the vertices and push vertexs with an entry degree of 0 to queue 518 //扫描所有顶点并将入度为0的节点推入队列 519 for (Map.Entry<Node, NodeInfo> vertices : nodesMap.entrySet()) { 520 Node node = vertices.getKey(); 521 int inDegree = getIndegree(node); 522 523 if (inDegree == 0) { 524 zeroIndegreeNodeQueue.add(node); 525 topoResultList.add(node); 526 } else { 527 notZeroIndegreeNodeMap.put(node, inDegree); 528 } 529 } 530 531 /** 532 * After scanning, there is no node with 0 degree of entry, 533 * 如果这幅图连一个入度为0的节点都没有的话,说明整个图就是一个环了 534 * indicating that there is a ring, and return directly 535 * 这种情况 就直接返回false就好了 536 */ 537 if(zeroIndegreeNodeQueue.isEmpty()){ 538 return new AbstractMap.SimpleEntry(false, topoResultList); 539 } 540 541 // The topology algorithm is used to delete nodes with 0 degree of entry and its associated edges 542 //拓扑算法用于删除具有0入度的节点及其关联边 543 while (!zeroIndegreeNodeQueue.isEmpty()) { 544 Node v = zeroIndegreeNodeQueue.poll(); 545 // Get the neighbor node 546 //获取相邻节点 547 Set<Node> subsequentNodes = getSubsequentNodes(v); 548 549 for (Node subsequentNode : subsequentNodes) { 550 551 Integer degree = notZeroIndegreeNodeMap.get(subsequentNode); 552 553 if(--degree == 0){ 554 topoResultList.add(subsequentNode); 555 zeroIndegreeNodeQueue.add(subsequentNode); 556 notZeroIndegreeNodeMap.remove(subsequentNode); 557 }else{ 558 notZeroIndegreeNodeMap.put(subsequentNode, degree); 559 } 560 561 } 562 } 563 564 // if notZeroIndegreeNodeMap is empty,there is no ring! 565 //如果非0入度的Map是空的,说明没有环,返回拓扑排序 566 AbstractMap.SimpleEntry resultMap = new AbstractMap.SimpleEntry(notZeroIndegreeNodeMap.size() == 0 , topoResultList); 567 return resultMap; 568 569 } 570 571 }