[Erlang 0088] RabbitMQ 集群 Disc Node 一点实现细节

刚开始使用rabbitmqctl 创建集群的时候会有一个问题,怎么控制节点是disk node还是ram node?翻看了rabbitmq2.8.7的代码看了一下才知道原委,记录一下:

先看下面的实验

这个实验其实之前已经做过了,这里抽出来做对比:

[root@localhost scripts]#
[root@localhost scripts]# RABBITMQ_NODE_PORT=9991 RABBITMQ_NODENAME=z_91@zen.com ./rabbitmq-server -detached
Activating RabbitMQ plugins ...
0 plugins activated:

[root@localhost scripts]# RABBITMQ_NODE_PORT=9992 RABBITMQ_NODENAME=z_92@zen.com ./rabbitmq-server -detached
Activating RabbitMQ plugins ...
0 plugins activated:

[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com stop_app
Stopping node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com reset
Resetting node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com cluster z_92@zen.com
Clustering node 'z_91@zen.com' with ['z_92@zen.com'] ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com start_app
Starting node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com cluster_status
Cluster status of node 'z_91@zen.com' ...
[{nodes,[{disc,['z_92@zen.com']},{ram,['z_91@zen.com']}]},
{running_nodes,['z_92@zen.com','z_91@zen.com']}]
...done.

细心的你一定发现了,这里的结果有点奇怪,91节点将92节点拉入组成集群,但是disc节点是92,91节点是ram节点!这是怎么回事?下面换一种方式组建集群,目的是观察rabbitmq在构建集群是如何选择Disc node的.和第一种组建方式的差异在于这行命令: ./rabbitmq-util -n z_91@zen.com cluster z_92@zen.com z_91@zen.com 这样完成组建之后,查看一下集群状态,注意disk node的已经变成了: [{nodes,[{disc,['z_91@zen.com','z_92@zen.com']}]},{running_nodes,['z_92@zen.com','z_91@zen.com']}]

[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com stop_app     
Stopping node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com reset
Resetting node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com cluster z_92@zen.com z_91@zen.com
Clustering node 'z_91@zen.com' with ['z_92@zen.com','z_91@zen.com'] ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com start_app
Starting node 'z_91@zen.com' ...
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_91@zen.com cluster_status
Cluster status of node 'z_91@zen.com' ...
[{nodes,[{disc,['z_91@zen.com','z_92@zen.com']}]},
{running_nodes,['z_92@zen.com','z_91@zen.com']}]
...done.
[root@localhost scripts]#  ./rabbitmq-util -n z_92@zen.com cluster_status
Cluster status of node 'z_92@zen.com' ...
[{nodes,[{disc,['z_91@zen.com','z_92@zen.com']}]},
{running_nodes,['z_91@zen.com','z_92@zen.com']}]
...done.
[root@localhost scripts]#

WHY?

我们先把答案说了,这是因为方法should_be_disc_node

should_be_disc_node(ClusterNodes) ->
    ClusterNodes == [] orelse lists:member(node(), ClusterNodes).

当集群初建的时候,没有节点是disk node,ClusterNodes为[]所以会把加入集群的第一个新节点设置为disk node;当ClusterNodes不为空的时候,只要ClusterNodes包含当前节点,就会把当前节点设置为disk node;ClusterNodes就是来自于rabbitmqctl cluster命令后跟的参数.

下面是详细的代码跟进过程,不再赘述,代码里面对一些关键的地方加了补充说明,比较容易理解.

rabbitmqctl的实现逻辑实际上是在rabbitcontrol模块,我们关注的是action(cluster...)分支:

..\rabbitmq-server-2.8.7\src\rabbit_control.erl

action(cluster, Node, ClusterNodeSs, _Opts, Inform) ->
    ClusterNodes = lists:map(fun list_to_atom/1, ClusterNodeSs),
    Inform("Clustering node ~p with ~p",
           [Node, ClusterNodes]),
    rpc_call(Node, rabbit_mnesia, cluster, [ClusterNodes]);

rabbitcontrol 调用的是rabbit_mnesia的cluster方法,跟进去看:

rabbit_mnesia.erl

  1 ..\rabbitmq-server-2.8.7\src\rabbit_mnesia.erl
  2 
  3 
  4 cluster(ClusterNodes) ->
  5 
  6     cluster(ClusterNodes, false).
  7 force_cluster(ClusterNodes) ->
  8     cluster(ClusterNodes, true).
  9 
 10  
 11 
 12 %% Alter which disk nodes this node is clustered with. This can be a
 13 %% subset of all the disk nodes in the cluster but can (and should)
 14 %% include the node itself if it is to be a disk rather than a ram
 15 %% node.  If Force is false, only connections to online nodes are
 16 %% allowed.
 17 cluster(ClusterNodes, Force) ->
 18     rabbit_misc:local_info_msg("Clustering with ~p~s~n",
 19                                [ClusterNodes, if Force -> " forcefully";
 20                                                  true  -> ""
 21                                               end]),
 22     ensure_mnesia_not_running(),
 23     ensure_mnesia_dir(),
 24 
 25     case not Force andalso is_clustered() andalso
 26          is_only_disc_node(node(), false) andalso
 27          not should_be_disc_node(ClusterNodes)
 28     of
 29         true -> log_both("last running disc node leaving cluster");
 30         _    -> ok
 31     end,
 32 
 33     %% Wipe mnesia if we're changing type from disc to ram
 34     case {is_disc_node(), should_be_disc_node(ClusterNodes)} of
 35         {true, false} -> rabbit_misc:with_local_io(
 36                            fun () -> error_logger:warning_msg(
 37                                        "changing node type; wiping "
 38                                        "mnesia...~n~n")
 39                            end),
 40                          rabbit_misc:ensure_ok(mnesia:delete_schema([node()]),
 41                                                cannot_delete_schema);
 42         _             -> ok
 43     end,
 44 
 45     %% Pre-emptively leave the cluster
 46     %%
 47     %% We're trying to handle the following two cases:
 48     %% 1. We have a two-node cluster, where both nodes are disc nodes.
 49     %% One node is re-clustered as a ram node.  When it tries to
 50     %% re-join the cluster, but before it has time to update its
 51     %% tables definitions, the other node will order it to re-create
 52     %% its disc tables.  So, we need to leave the cluster before we
 53     %% can join it again.
 54     %% 2. We have a two-node cluster, where both nodes are disc nodes.
 55     %% One node is forcefully reset (so, the other node thinks its
 56     %% still a part of the cluster).  The reset node is re-clustered
 57     %% as a ram node.  Same as above, we need to leave the cluster
 58     %% before we can join it.  But, since we don't know if we're in a
 59     %% cluster or not, we just pre-emptively leave it before joining.
 60     ProperClusterNodes = ClusterNodes -- [node()],
 61     try
 62         ok = leave_cluster(ProperClusterNodes, ProperClusterNodes)
 63     catch
 64         {error, {no_running_cluster_nodes, _, _}} when Force ->
 65             ok
 66     end,
 67 
 68     %% Join the cluster
 69     start_mnesia(),
 70     try
 71         ok = init_db(ClusterNodes, Force),
 72         ok = create_cluster_nodes_config(ClusterNodes)
 73     after
 74         stop_mnesia()
 75     end,
 76 
 77     ok.
 78 
 79  
 80 %% Take a cluster node config and create the right kind of node - a
 81 %% standalone disk node, or disk or ram node connected to the
 82 %% specified cluster nodes.  If Force is false, don't allow
 83 %% connections to offline nodes.
 84 init_db(ClusterNodes, Force, SecondaryPostMnesiaFun) ->
 85     UClusterNodes = lists:usort(ClusterNodes),
 86     ProperClusterNodes = UClusterNodes -- [node()],
 87     case mnesia:change_config(extra_db_nodes, ProperClusterNodes) of
 88         {ok, []} when not Force andalso ProperClusterNodes =/= [] ->
 89             throw({error, {failed_to_cluster_with, ProperClusterNodes,
 90                            "Mnesia could not connect to any disc nodes."}});
 91         {ok, Nodes} ->
 92             WasDiscNode = is_disc_node(),
 93             WantDiscNode = should_be_disc_node(ClusterNodes),
 94             %% We create a new db (on disk, or in ram) in the first
 95             %% two cases and attempt to upgrade the in the other two
 96             case {Nodes, WasDiscNode, WantDiscNode} of
 97                 {[], _, false} ->
 98                     %% New ram node; start from scratch
 99                     ok = create_schema(ram);
100                 {[], false, true} ->
101                     %% Nothing there at all, start from scratch
102                     ok = create_schema(disc);
103                 {[], true, true} ->
104                     %% We're the first node up
105                     case rabbit_upgrade:maybe_upgrade_local() of
106                         ok                    -> ensure_schema_integrity();
107                         version_not_available -> ok = schema_ok_or_move()
108                     end;
109                 {[AnotherNode|_], _, _} ->
110                     %% Subsequent node in cluster, catch up
111                     ensure_version_ok(
112                       rpc:call(AnotherNode, rabbit_version, recorded, [])),
113                     {CopyType, CopyTypeAlt} =
114                         case WantDiscNode of
115                             true  -> {disc, disc_copies};
116                             false -> {ram, ram_copies}
117                         end,
118                     ok = wait_for_replicated_tables(),
119                     ok = create_local_table_copy(schema, CopyTypeAlt),
120                     ok = create_local_table_copies(CopyType),
121 
122                     ok = SecondaryPostMnesiaFun(),
123                     %% We've taken down mnesia, so ram nodes will need
124                     %% to re-sync
125                     case is_disc_node() of
126                         false -> start_mnesia(),
127                                  mnesia:change_config(extra_db_nodes,
128                                                       ProperClusterNodes),
129                                  wait_for_replicated_tables();
130                         true  -> ok
131                     end,
132 
133                     ensure_schema_integrity(),
134                     ok
135             end;
136         {error, Reason} ->
137             %% one reason we may end up here is if we try to join
138             %% nodes together that are currently running standalone or
139             %% are members of a different cluster
140             throw({error, {unable_to_join_cluster, ClusterNodes, Reason}})
141     end.
142 
143   
144 is_disc_node() -> mnesia:system_info(use_dir).
145 
146 
147 should_be_disc_node(ClusterNodes) ->
148     ClusterNodes == [] orelse lists:member(node(), ClusterNodes).
149  
150 
151 is_clustered() ->
152     RunningNodes = running_clustered_nodes(),
153     [node()] /= RunningNodes andalso [] /= RunningNodes.
154  
155 
156 is_only_disc_node(Node, _MnesiaRunning = true) ->
157     RunningSet = sets:from_list(running_clustered_nodes()),
158     DiscSet = sets:from_list(nodes_of_type(disc_copies)),
159     [Node] =:= sets:to_list(sets:intersection(RunningSet, DiscSet));
160 is_only_disc_node(Node, false) ->
161     start_mnesia(),
162     Res = is_only_disc_node(Node, true),
163     stop_mnesia(),
164     Res.
165 
166  
167 nodes_of_type(Type) ->
168     %% This function should return the nodes of a certain type (ram,
169     %% disc or disc_only) in the current cluster.  The type of nodes
170     %% is determined when the cluster is initially configured.
171     mnesia:table_info(schema, Type).

代码太长了,展开看吧 : )

最后,小图一张我有一个一样的台灯

发表于 2012-11-13 12:41 坚强2002 阅读(7095) 评论(0) 收藏举报

刷新页面返回顶部

[Erlang 0088] RabbitMQ 集群 Disc Node 一点实现细节

先看下面的实验

WHY?

公告