RocketMQ 消费者负载均衡源码解析
RocketMQ 消费者负载均衡源码解析
RebalanceService
消费端的负载均衡核心类是RebalanceService,其在MQClientInstance中被构建出来
public MQClientInstance(ClientConfig clientConfig, int instanceIndex, String clientId, RPCHook rpcHook) {
//省略...
this.rebalanceService = new RebalanceService(this);
//省略...
}
并在MQClientInstance的start中被启动
public void start() throws MQClientException {
//省略...
// Start rebalance service 启动消费队列负载均衡
this.rebalanceService.start();
//省略...
}
下面看下RebalanceService这个类
public class RebalanceService extends ServiceThread {
//每次负载均衡间隔时间,默认20秒
private static long waitInterval =
Long.parseLong(System.getProperty(
"rocketmq.client.rebalance.waitInterval", "20000"));
private final Logger log = ClientLogger.getLog();
private final MQClientInstance mqClientFactory;
public RebalanceService(MQClientInstance mqClientFactory) {
this.mqClientFactory = mqClientFactory;
}
@Override
public void run() {
log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
//① 默认阻塞20秒 即20秒进行一次负载均衡
this.waitForRunning(waitInterval);
//② 委托mqClientFactory.doRebalance进行实际的负载均衡操作
this.mqClientFactory.doRebalance();
}
log.info(this.getServiceName() + " service end");
}
@Override
public String getServiceName() {
return RebalanceService.class.getSimpleName();
}
}
① 默认阻塞20秒 即20秒进行一次负载均衡(消费者启动的时候会立刻唤醒该线程进行一次负载均衡操作)
② 委托mqClientFactory.doRebalance进行实际的负载均衡操作
mqClientFactory.doRebalance
public void doRebalance() {
//consumerTable是一个map,保存了所有注册在里面消费者,key为groupName消费组名
//遍历所有的消费者,并委托其进行负载均衡操作
for (Map.Entry<String, MQConsumerInner> entry : this.consumerTable.entrySet()) {
MQConsumerInner impl = entry.getValue();
if (impl != null) {
try {
impl.doRebalance();
} catch (Throwable e) {
log.error("doRebalance exception", e);
}
}
}
}
consumerTable是一个map,保存了所有注册在里面消费者,key为groupName消费组名。遍历所有的消费者,并委托其进行负载均衡操作
MQConsumerInner.doRebalance
这里我们以默认的DefaultMQPushConsumerImpl实现为例研究接下去的流程
DefaultMQPushConsumerImpl#doRebalance
public void doRebalance() {
if (!this.pause) {
//继续委托内部的rebalanceImpl进行负载均衡操作
this.rebalanceImpl.doRebalance(this.isConsumeOrderly());
}
}
继续委托内部的rebalanceImpl进行负载均衡操作,参数为是否是有序消费
rebalanceImpl.doRebalance
public void doRebalance(final boolean isOrder) {
//获取主题订阅信息
Map<String, SubscriptionData> subTable = this.getSubscriptionInner();
if (subTable != null) {
//遍历主题订阅信息
for (final Map.Entry<String, SubscriptionData> entry : subTable.entrySet()) {
final String topic = entry.getKey();
try {
//把每个主题进行负载均衡
this.rebalanceByTopic(topic, isOrder);
} catch (Throwable e) {
if (!topic.startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) {
log.warn("rebalanceByTopic Exception", e);
}
}
}
}
this.truncateMessageQueueNotMyTopic();
}
获取主题订阅信息,遍历每个主题订阅信息,将每个主题进行负载均衡
rebalanceImpl.rebalanceByTopic
private void rebalanceByTopic(final String topic, final boolean isOrder) {
//消费模式,默认为集群模式
switch (messageModel) {
//广播模式
case BROADCASTING: {
//广播模式不用负载均衡,当前消费者直接消费所有的队列
Set<MessageQueue> mqSet = this.topicSubscribeInfoTable.get(topic);
if (mqSet != null) {
boolean changed = this.updateProcessQueueTableInRebalance(topic, mqSet, isOrder);
if (changed) {
this.messageQueueChanged(topic, mqSet, mqSet);
log.info("messageQueueChanged {} {} {} {}",
consumerGroup,
topic,
mqSet,
mqSet);
}
} else {
log.warn("doRebalance, {}, but the topic[{}] not exist.", consumerGroup, topic);
}
break;
}
case CLUSTERING: {
//① 获取topic主题下所有的队列
Set<MessageQueue> mqSet = this.topicSubscribeInfoTable.get(topic);
//② 获取topic主题下group组下所有的消费者id
List<String> cidAll = this.mQClientFactory.findConsumerIdList(topic, consumerGroup);
if (null == mqSet) {
if (!topic.startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) {
log.warn("doRebalance, {}, but the topic[{}] not exist.", consumerGroup, topic);
}
}
if (null == cidAll) {
log.warn("doRebalance, {} {}, get consumer id list failed", consumerGroup, topic);
}
if (mqSet != null && cidAll != null) {
List<MessageQueue> mqAll = new ArrayList<MessageQueue>();
mqAll.addAll(mqSet);
//③ 排序队列 同一个broker的会排在一起,然后按队列id排序
Collections.sort(mqAll);
//排序消费者id
Collections.sort(cidAll);
//分配消费队列策略类
AllocateMessageQueueStrategy strategy = this.allocateMessageQueueStrategy;
List<MessageQueue> allocateResult = null;
try {
//委托strategy进行队列分配,保证同一个消费组内的消费者分配到的队列是不同的
allocateResult = strategy.allocate(
this.consumerGroup,
this.mQClientFactory.getClientId(),
mqAll,
cidAll);
} catch (Throwable e) {
log.error("AllocateMessageQueueStrategy.allocate Exception. allocateMessageQueueStrategyName={}", strategy.getName(),
e);
return;
}
Set<MessageQueue> allocateResultSet = new HashSet<MessageQueue>();
if (allocateResult != null) {
allocateResultSet.addAll(allocateResult);
}
//④ 将分配给当前消费者的队列传入,进行队列的新增、删除操作
boolean changed = this.updateProcessQueueTableInRebalance(topic, allocateResultSet, isOrder);
if (changed) {
log.info(
"rebalanced result changed. allocateMessageQueueStrategyName={}, group={}, topic={}, clientId={}, mqAllSize={}, cidAllSize={}, rebalanceResultSize={}, rebalanceResultSet={}",
strategy.getName(), consumerGroup, topic, this.mQClientFactory.getClientId(), mqSet.size(), cidAll.size(),
allocateResultSet.size(), allocateResultSet);
//⑤ 处理消费队列分配发生了变化后的逻辑
this.messageQueueChanged(topic, mqSet, allocateResultSet);
}
}
break;
}
default:
break;
}
}
重点分析集群模式下的队列分配
① 获取topic主题下所有的队列
② 获取topic主题下group组下所有的消费者id
③ 排序队列和消费者id(同一个broker的会排在一起,然后按队列id排序),并委托strategy进行队列分配,保证同一个消费组内的消费者分配到的队列是不同的
④ 将分配给当前消费者的队列传入,进行队列的新增、删除操作
⑤ 处理消费队列分配发生了变化后的逻辑
rebalanceImpl.updateProcessQueueTableInRebalance
private boolean updateProcessQueueTableInRebalance(final String topic, final Set<MessageQueue> mqSet,
final boolean isOrder) {
//队列分配有无变化
boolean changed = false;
//遍历当前消费者处理的队列
Iterator<Entry<MessageQueue, ProcessQueue>> it = this.processQueueTable.entrySet().iterator();
while (it.hasNext()) {
Entry<MessageQueue, ProcessQueue> next = it.next();
//消息队列
MessageQueue mq = next.getKey();
//消息队列对于的处理队列
ProcessQueue pq = next.getValue();
//只判断同一个主题的
if (mq.getTopic().equals(topic)) {
if (!mqSet.contains(mq)) {
//重新分配的队列以及没有改队列了,销毁掉对应的processQueue
pq.setDropped(true);
//移除代码需要单独分析
if (this.removeUnnecessaryMessageQueue(mq, pq)) {
it.remove();
changed = true;
log.info("doRebalance, {}, remove unnecessary mq, {}", consumerGroup, mq);
}
//超时未拉取消息的队列也销毁掉(默认2分钟超时)
} else if (pq.isPullExpired()) {
switch (this.consumeType()) {
case CONSUME_ACTIVELY:
break;
case CONSUME_PASSIVELY:
pq.setDropped(true);
if (this.removeUnnecessaryMessageQueue(mq, pq)) {
it.remove();
changed = true;
log.error("[BUG]doRebalance, {}, remove unnecessary mq, {}, because pull is pause, so try to fixed it",
consumerGroup, mq);
}
break;
default:
break;
}
}
}
}
//生成需要新增的队列消息拉取请求
List<PullRequest> pullRequestList = new ArrayList<PullRequest>();
for (MessageQueue mq : mqSet) {
if (!this.processQueueTable.containsKey(mq)) {
//顺序消费且mq没有被锁定成功队列
if (isOrder && !this.lock(mq)) {
log.warn("doRebalance, {}, add a new mq failed, {}, because lock failed", consumerGroup, mq);
continue;
}
//移除掉该队列之前的消费进度
this.removeDirtyOffset(mq);
//新建处理队列
ProcessQueue pq = new ProcessQueue();
//计算下个消费的offset
long nextOffset = this.computePullFromWhere(mq);
if (nextOffset >= 0) {
//将当前pq放入processQueueTable
ProcessQueue pre = this.processQueueTable.putIfAbsent(mq, pq);
if (pre != null) {
log.info("doRebalance, {}, mq already exists, {}", consumerGroup, mq);
} else {
//创建拉取消息请求
log.info("doRebalance, {}, add a new mq, {}", consumerGroup, mq);
PullRequest pullRequest = new PullRequest();
pullRequest.setConsumerGroup(consumerGroup);
pullRequest.setNextOffset(nextOffset);
pullRequest.setMessageQueue(mq);
pullRequest.setProcessQueue(pq);
pullRequestList.add(pullRequest);
changed = true;
}
} else {
log.warn("doRebalance, {}, add new mq failed, {}", consumerGroup, mq);
}
}
}
//转发拉取消息请求
this.dispatchPullRequest(pullRequestList);
return changed;
}
public void dispatchPullRequest(List<PullRequest> pullRequestList) {
for (PullRequest pullRequest : pullRequestList) {
//委托defaultMQPushConsumerImpl处理拉取请求
this.defaultMQPushConsumerImpl.executePullRequestImmediately(pullRequest);
log.info("doRebalance, {}, add a new pull request {}", consumerGroup, pullRequest);
}
}
defaultMQPushConsumerImpl.executePullRequestImmediately
public void executePullRequestImmediately(final PullRequest pullRequest) {
//委托PullMessageService处理请求
this.mQClientFactory.getPullMessageService().executePullRequestImmediately(pullRequest);
}
PullMessageService.executePullRequestImmediately
public void executePullRequestImmediately(final PullRequest pullRequest) {
try {
//将拉取消息的请求放入pullRequestQueue
this.pullRequestQueue.put(pullRequest);
} catch (InterruptedException e) {
log.error("executePullRequestImmediately pullRequestQueue.put", e);
}
}
将pullRequest放入pullRequestQueue队列中
public void run() {
log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
//从pullRequestQueue队列中去除请求
PullRequest pullRequest = this.pullRequestQueue.take();
if (pullRequest != null) {
//拉取消息
this.pullMessage(pullRequest);
}
} catch (InterruptedException e) {
} catch (Exception e) {
log.error("Pull Message Service Run Method exception", e);
}
}
log.info(this.getServiceName() + " service end");
}
从pullRequestQueue队列中去除请求,并调用pullMessage拉取消息
private void pullMessage(final PullRequest pullRequest) {
//① 根据groupName取出消费者
final MQConsumerInner consumer = this.mQClientFactory.selectConsumer(pullRequest.getConsumerGroup());
if (consumer != null) {
DefaultMQPushConsumerImpl impl = (DefaultMQPushConsumerImpl) consumer;
//② 委托消费者拉取消息
impl.pullMessage(pullRequest);
} else {
log.warn("No matched consumer for the PullRequest {}, drop it", pullRequest);
}
}
① 根据groupName取出消费者
② 委托消费者拉取消息
DefaultMQPushConsumerImpl.pullMessage
public void pullMessage(final PullRequest pullRequest) {
final ProcessQueue processQueue = pullRequest.getProcessQueue();
if (processQueue.isDropped()) {
log.info("the pull request[{}] is dropped.", pullRequest.toString());
return;
}
//处理队列设置最后的拉取时间戳
pullRequest.getProcessQueue().setLastPullTimestamp(System.currentTimeMillis());
try {
//确定当前消费者还是处于RUNNING状态的
this.makeSureStateOK();
} catch (MQClientException e) {
log.warn("pullMessage exception, consumer state not ok", e);
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION);
return;
}
if (this.isPause()) {
log.warn("consumer was paused, execute pull request later. instanceName={}, group={}", this.defaultMQPushConsumer.getInstanceName(), this.defaultMQPushConsumer.getConsumerGroup());
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_SUSPEND);
return;
}
//队列里缓存的消息数量
long cachedMessageCount = processQueue.getMsgCount().get();
//队列里缓存的消息字节M数
long cachedMessageSizeInMiB = processQueue.getMsgSize().get() / (1024 * 1024);
//队列里缓存的消息数量是否大于1000
if (cachedMessageCount > this.defaultMQPushConsumer.getPullThresholdForQueue()) {
//50毫秒后重试
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL);
if ((queueFlowControlTimes++ % 1000) == 0) {
log.warn(
"the cached message count exceeds the threshold {}, so do flow control, minOffset={}, maxOffset={}, count={}, size={} MiB, pullRequest={}, flowControlTimes={}",
this.defaultMQPushConsumer.getPullThresholdForQueue(), processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), cachedMessageCount, cachedMessageSizeInMiB, pullRequest, queueFlowControlTimes);
}
return;
}
//队列里缓存的消息字节M数是否大于100M
if (cachedMessageSizeInMiB > this.defaultMQPushConsumer.getPullThresholdSizeForQueue()) {
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL);
if ((queueFlowControlTimes++ % 1000) == 0) {
log.warn(
"the cached message size exceeds the threshold {} MiB, so do flow control, minOffset={}, maxOffset={}, count={}, size={} MiB, pullRequest={}, flowControlTimes={}",
this.defaultMQPushConsumer.getPullThresholdSizeForQueue(), processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), cachedMessageCount, cachedMessageSizeInMiB, pullRequest, queueFlowControlTimes);
}
return;
}
//如果是并发无序消费模式,processQueue里最大的offset和最小的offset的差值不能超过2000
if (!this.consumeOrderly) {
if (processQueue.getMaxSpan() > this.defaultMQPushConsumer.getConsumeConcurrentlyMaxSpan()) {
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL);
if ((queueMaxSpanFlowControlTimes++ % 1000) == 0) {
log.warn(
"the queue's messages, span too long, so do flow control, minOffset={}, maxOffset={}, maxSpan={}, pullRequest={}, flowControlTimes={}",
processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), processQueue.getMaxSpan(),
pullRequest, queueMaxSpanFlowControlTimes);
}
return;
}
} else {
if (processQueue.isLocked()) {
if (!pullRequest.isLockedFirst()) {
final long offset = this.rebalanceImpl.computePullFromWhere(pullRequest.getMessageQueue());
boolean brokerBusy = offset < pullRequest.getNextOffset();
log.info("the first time to pull message, so fix offset from broker. pullRequest: {} NewOffset: {} brokerBusy: {}",
pullRequest, offset, brokerBusy);
if (brokerBusy) {
log.info("[NOTIFYME]the first time to pull message, but pull request offset larger than broker consume offset. pullRequest: {} NewOffset: {}",
pullRequest, offset);
}
pullRequest.setLockedFirst(true);
pullRequest.setNextOffset(offset);
}
} else {
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION);
log.info("pull message later because not locked in broker, {}", pullRequest);
return;
}
}
final SubscriptionData subscriptionData = this.rebalanceImpl.getSubscriptionInner().get(pullRequest.getMessageQueue().getTopic());
if (null == subscriptionData) {
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION);
log.warn("find the consumer's subscription failed, {}", pullRequest);
return;
}
final long beginTimestamp = System.currentTimeMillis();
PullCallback pullCallback = new PullCallback() {
@Override
public void onSuccess(PullResult pullResult) {
if (pullResult != null) {
pullResult = DefaultMQPushConsumerImpl.this.pullAPIWrapper.processPullResult(pullRequest.getMessageQueue(), pullResult,
subscriptionData);
switch (pullResult.getPullStatus()) {
//拉取到消息
case FOUND:
long prevRequestOffset = pullRequest.getNextOffset();
//设置下次拉取消息的开始offset
pullRequest.setNextOffset(pullResult.getNextBeginOffset());
//统计响应时间
long pullRT = System.currentTimeMillis() - beginTimestamp;
DefaultMQPushConsumerImpl.this.getConsumerStatsManager().incPullRT(pullRequest.getConsumerGroup(),
pullRequest.getMessageQueue().getTopic(), pullRT);
long firstMsgOffset = Long.MAX_VALUE;
if (pullResult.getMsgFoundList() == null || pullResult.getMsgFoundList().isEmpty()) {
//没有消息拉取到,立刻发起下次请求
DefaultMQPushConsumerImpl.this.executePullRequestImmediately(pullRequest);
} else {
//一批消息第一条的队列偏移量
firstMsgOffset = pullResult.getMsgFoundList().get(0).getQueueOffset();
//统计tps
DefaultMQPushConsumerImpl.this.getConsumerStatsManager().incPullTPS(pullRequest.getConsumerGroup(),
pullRequest.getMessageQueue().getTopic(), pullResult.getMsgFoundList().size());
//processQueue放入拉取的消息
boolean dispathToConsume = processQueue.putMessage(pullResult.getMsgFoundList());
//向consumeMessageService提交消费消息的任务
DefaultMQPushConsumerImpl.this.consumeMessageService.submitConsumeRequest(
pullResult.getMsgFoundList(),
processQueue,
pullRequest.getMessageQueue(),
dispathToConsume);
//发起下一次拉取任务的请求 pullInterval默认为0
if (DefaultMQPushConsumerImpl.this.defaultMQPushConsumer.getPullInterval() > 0) {
DefaultMQPushConsumerImpl.this.executePullRequestLater(pullRequest,
DefaultMQPushConsumerImpl.this.defaultMQPushConsumer.getPullInterval());
} else {
DefaultMQPushConsumerImpl.this.executePullRequestImmediately(pullRequest);
}
}
if (pullResult.getNextBeginOffset() < prevRequestOffset
|| firstMsgOffset < prevRequestOffset) {
log.warn(
"[BUG] pull message result maybe data wrong, nextBeginOffset: {} firstMsgOffset: {} prevRequestOffset: {}",
pullResult.getNextBeginOffset(),
firstMsgOffset,
prevRequestOffset);
}
break;
//没有新消息
case NO_NEW_MSG:
pullRequest.setNextOffset(pullResult.getNextBeginOffset());
//尝试保存当前消费进度
DefaultMQPushConsumerImpl.this.correctTagsOffset(pullRequest);
//提交下一轮拉取请求
DefaultMQPushConsumerImpl.this.executePullRequestImmediately(pullRequest);
break;
//没有匹配的消息
case NO_MATCHED_MSG:
pullRequest.setNextOffset(pullResult.getNextBeginOffset());
DefaultMQPushConsumerImpl.this.correctTagsOffset(pullRequest);
DefaultMQPushConsumerImpl.this.executePullRequestImmediately(pullRequest);
break;
//offset出错
case OFFSET_ILLEGAL:
log.warn("the pull request offset illegal, {} {}",
pullRequest.toString(), pullResult.toString());
pullRequest.setNextOffset(pullResult.getNextBeginOffset());
pullRequest.getProcessQueue().setDropped(true);
DefaultMQPushConsumerImpl.this.executeTaskLater(new Runnable() {
@Override
public void run() {
try {
DefaultMQPushConsumerImpl.this.offsetStore.updateOffset(pullRequest.getMessageQueue(),
pullRequest.getNextOffset(), false);
DefaultMQPushConsumerImpl.this.offsetStore.persist(pullRequest.getMessageQueue());
DefaultMQPushConsumerImpl.this.rebalanceImpl.removeProcessQueue(pullRequest.getMessageQueue());
log.warn("fix the pull request offset, {}", pullRequest);
} catch (Throwable e) {
log.error("executeTaskLater Exception", e);
}
}
}, 10000);
break;
default:
break;
}
}
}
@Override
public void onException(Throwable e) {
if (!pullRequest.getMessageQueue().getTopic().startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) {
log.warn("execute the pull request exception", e);
}
DefaultMQPushConsumerImpl.this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION);
}
};
boolean commitOffsetEnable = false;
long commitOffsetValue = 0L;
if (MessageModel.CLUSTERING == this.defaultMQPushConsumer.getMessageModel()) {
commitOffsetValue = this.offsetStore.readOffset(pullRequest.getMessageQueue(), ReadOffsetType.READ_FROM_MEMORY);
if (commitOffsetValue > 0) {
commitOffsetEnable = true;
}
}
String subExpression = null;
boolean classFilter = false;
SubscriptionData sd = this.rebalanceImpl.getSubscriptionInner().get(pullRequest.getMessageQueue().getTopic());
if (sd != null) {
if (this.defaultMQPushConsumer.isPostSubscriptionWhenPull() && !sd.isClassFilterMode()) {
subExpression = sd.getSubString();
}
classFilter = sd.isClassFilterMode();
}
//构建sysFlag
int sysFlag = PullSysFlag.buildSysFlag(
commitOffsetEnable, // commitOffset
true, // suspend
subExpression != null, // subscription
classFilter // class filter
);
try {
//发起拉取消息的请求
this.pullAPIWrapper.pullKernelImpl(
pullRequest.getMessageQueue(),
subExpression,
subscriptionData.getExpressionType(),
subscriptionData.getSubVersion(),
pullRequest.getNextOffset(),
this.defaultMQPushConsumer.getPullBatchSize(),
sysFlag,
commitOffsetValue,
BROKER_SUSPEND_MAX_TIME_MILLIS,
CONSUMER_TIMEOUT_MILLIS_WHEN_SUSPEND,
CommunicationMode.ASYNC,
pullCallback
);
} catch (Exception e) {
log.error("pullKernelImpl exception", e);
this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION);
}
}
如果拉取到了消息,会将消息提交给consumeMessageService处理拉取到的msgList
DefaultMQPushConsumerImpl.this.consumeMessageService.submitConsumeRequest(
pullResult.getMsgFoundList(),
processQueue,
pullRequest.getMessageQueue(),
dispathToConsume);
consumeMessageService有并发和有序消费的两种实现,分别为:
- ConsumeMessageConcurrentlyService 并发无序消费
- ConsumeMessageOrderlyService 有序消费
具体如何消费信息将单独出2篇文章来解析。