RocketMQ源码解析之Broker消息存储(消息存储)
接上篇RocketMQ源码解析之Broker消息存储(消息接收)
DefaultMessageStore#asyncPutMessage
/**
* 进行存储
*
* @param msg MessageInstance to store
* @return
*/
@Override
public CompletableFuture<PutMessageResult> asyncPutMessage(MessageExtBrokerInner msg) {
// broker状态校验部分,检测存储器状态:是否关闭、broker是否为slave、是否不可写、操作系统页写入操作是否频繁
PutMessageStatus checkStoreStatus = this.checkStoreStatus();
if (checkStoreStatus != PutMessageStatus.PUT_OK) {
return CompletableFuture.completedFuture(new PutMessageResult(checkStoreStatus, null));
}
// 消息校验部分,检测msg的长度是否合法:topic长度是否超过127、properties大小是否超过32767
PutMessageStatus msgCheckStatus = this.checkMessage(msg);
if (msgCheckStatus == PutMessageStatus.MESSAGE_ILLEGAL) {
return CompletableFuture.completedFuture(new PutMessageResult(msgCheckStatus, null));
}
// 获取系统时间
long beginTime = this.getSystemClock().now();
// 往commitlog中存储消息
CompletableFuture<PutMessageResult> putResultFuture = this.commitLog.asyncPutMessage(msg);
putResultFuture.thenAccept((result) -> {
long elapsedTime = this.getSystemClock().now() - beginTime;
if (elapsedTime > 500) {
log.warn("putMessage not in lock elapsed time(ms)={}, bodyLength={}", elapsedTime, msg.getBody().length);
}
// 记录状态
this.storeStatsService.setPutMessageEntireTimeMax(elapsedTime);
if (null == result || !result.isOk()) {
this.storeStatsService.getPutMessageFailedTimes().incrementAndGet();
}
});
return putResultFuture;
}
-
第10行,broker状态校验,检测存储器状态:是否关闭、broker是否为slave、是否不可写、操作系统页写入操作是否频繁
-
第15行,消息校验部分,检测msg的长度是否合法:topic长度是否超过127、properties大小是否超过32767
-
第23行,往commitlog中存储消息,详细解析见
CommitLog#asyncPutMessage
-
第26行,elapsedTime记录消息存储的时间
CommitLog#asyncPutMessage
public CompletableFuture<PutMessageResult> asyncPutMessage(final MessageExtBrokerInner msg) {
// Set the storage time 设置存储时间
msg.setStoreTimestamp(System.currentTimeMillis());
// Set the message body BODY CRC (consider the most appropriate setting
// on the client) 设置crc-循环冗余校验
msg.setBodyCRC(UtilAll.crc32(msg.getBody()));
// Back to Results 记录返回的结果
AppendMessageResult result = null;
// Statistics 统计存储服务,记录消息推送次数和消息大小
StoreStatsService storeStatsService = this.defaultMessageStore.getStoreStatsService();
String topic = msg.getTopic();
int queueId = msg.getQueueId();
// 获取事务状态
final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag());
// 针对延迟消息的处理
if (tranType == MessageSysFlag.TRANSACTION_NOT_TYPE
|| tranType == MessageSysFlag.TRANSACTION_COMMIT_TYPE) {
// Delay Delivery 延时消息,延迟级别大于0
// 延迟消息会由ScheduleMessageService的start方法去创建每个延迟级别对应的定时任务
if (msg.getDelayTimeLevel() > 0) {
// 如果延时等级大于最大延时等级, 就设置成最大延时等级
if (msg.getDelayTimeLevel() > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
msg.setDelayTimeLevel(this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel());
}
// 设置延时队列,每个延迟消息的主题都被暂时修改为SCHEDULE_TOPIC_XXXX
topic = TopicValidator.RMQ_SYS_SCHEDULE_TOPIC;
// 并且根据延迟级别,延迟消息更改了新的队列id
queueId = ScheduleMessageService.delayLevel2QueueId(msg.getDelayTimeLevel());
// Backup real topic, queueId 备份真正的topic和队列id
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_TOPIC, msg.getTopic());
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_QUEUE_ID, String.valueOf(msg.getQueueId()));
msg.setPropertiesString(MessageDecoder.messageProperties2String(msg.getProperties()));
// 设置延迟消息的topic和队列id
msg.setTopic(topic);
msg.setQueueId(queueId);
}
}
long elapsedTimeInLock = 0;
MappedFile unlockMappedFile = null;
// 获取最后一个文件,就是commitlog 目录下的那个文件
MappedFile mappedFile = this.mappedFileQueue.getLastMappedFile();
// 获取写入锁,根据配置来选择使用ReentrantLock还是CAS,默认使用CAS
putMessageLock.lock(); //spin or ReentrantLock ,depending on store config
try {
// 开始在锁里的时间
long beginLockTimestamp = this.defaultMessageStore.getSystemClock().now();
// 获取锁的时间戳,在isOSPageCacheBusy()会根据这个变量判断页写操作是否繁忙
this.beginTimeInLock = beginLockTimestamp;
// Here settings are stored timestamp, in order to ensure an orderly
// global
// 重新设置了写入的时间戳,以确保全局有序
msg.setStoreTimestamp(beginLockTimestamp);
// 判断mappedFile 文件是否为null 或者写满了
if (null == mappedFile || mappedFile.isFull()) {
// 从mappedFileQueue中获取最后一个MappedFile
mappedFile = this.mappedFileQueue.getLastMappedFile(0); // Mark: NewFile may be cause noise
}
if (null == mappedFile) {
log.error("create mapped file1 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
beginTimeInLock = 0;
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, null));
}
// 往MappedFile中追加消息
result = mappedFile.appendMessage(msg, this.appendMessageCallback);
switch (result.getStatus()) { // 判断写入状态
case PUT_OK:
break;
case END_OF_FILE: // 放不下追加的消息,会重新获取一个MappedFile,然后追加
unlockMappedFile = mappedFile;
// Create a new file, re-write the message
mappedFile = this.mappedFileQueue.getLastMappedFile(0);
if (null == mappedFile) {
// XXX: warn and notify me
log.error("create mapped file2 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
beginTimeInLock = 0;
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, result));
}
result = mappedFile.appendMessage(msg, this.appendMessageCallback);
break;
case MESSAGE_SIZE_EXCEEDED:
case PROPERTIES_SIZE_EXCEEDED:
beginTimeInLock = 0;
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.MESSAGE_ILLEGAL, result));
case UNKNOWN_ERROR:
beginTimeInLock = 0;
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result));
default:
beginTimeInLock = 0;
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result));
}
elapsedTimeInLock = this.defaultMessageStore.getSystemClock().now() - beginLockTimestamp;
beginTimeInLock = 0;
} finally {
// 释放锁
putMessageLock.unlock();
}
if (elapsedTimeInLock > 500) {
log.warn("[NOTIFYME]putMessage in lock cost time(ms)={}, bodyLength={} AppendMessageResult={}", elapsedTimeInLock, msg.getBody().length, result);
}
if (null != unlockMappedFile && this.defaultMessageStore.getMessageStoreConfig().isWarmMapedFileEnable()) {
this.defaultMessageStore.unlockMappedFile(unlockMappedFile);
}
// 封装消息结果实体
PutMessageResult putMessageResult = new PutMessageResult(PutMessageStatus.PUT_OK, result);
// Statistics 记录消息推送次数和消息大小
storeStatsService.getSinglePutMessageTopicTimesTotal(msg.getTopic()).incrementAndGet();
storeStatsService.getSinglePutMessageTopicSizeTotal(topic).addAndGet(result.getWroteBytes());
// 刷盘 + HA
// 前面将数据写入到ByteBuffer缓冲区了,后面就是刷盘操作
CompletableFuture<PutMessageStatus> flushResultFuture = submitFlushRequest(result, msg);
CompletableFuture<PutMessageStatus> replicaResultFuture = submitReplicaRequest(result, msg);
return flushResultFuture.thenCombine(replicaResultFuture, (flushStatus, replicaStatus) -> {
if (flushStatus != PutMessageStatus.PUT_OK) {
putMessageResult.setPutMessageStatus(flushStatus);
}
if (replicaStatus != PutMessageStatus.PUT_OK) {
putMessageResult.setPutMessageStatus(replicaStatus);
if (replicaStatus == PutMessageStatus.FLUSH_SLAVE_TIMEOUT) {
log.error("do sync transfer other node, wait return, but failed, topic: {} tags: {} client address: {}",
msg.getTopic(), msg.getTags(), msg.getBornHostNameString());
}
}
return putMessageResult;
});
}
- 第3行和第6行,分别设置存储时间和crc循环冗余校验
- 第11行,
storeStatsService
用于记录消息推送次数和消息大小,第119行和第120行会分别记录消息推送次数和消息的大小 - 第16行,获取事务状态,
- 第19行~第41行是针对延迟任务
- 第46行,获取最后一个文件,
commitlog
目录下的文件 - 第48行和第104行分别是在MappedFile追加消息的过程中加锁和释放锁,可以根据配置来选择使用ReentrantLock还是CAS,默认使用CAS
- 第51行,记录开始的时间,,在DefaultMessageStore#isOSPageCacheBusy函数中会利用这个变量来判断OS页写操作是否繁忙
- 第72行,往MappedFile中追加消息,追加的详细解析见
MappedFile#appendMessagesInner
- 第73行~第98行是针对追加消息的结果分情况处理,如果是END_OF_FILE,说明文件剩余空间不足,放不下需要追加的消息,就会重新获取一个MappedFile进行追加
- 第124行~第125行,具体刷盘过程和HA,
CommitLog#submitFlushRequest
详情见RocketMQ源码解析之Broker消息存储(刷盘机制);CommitLog#submitReplicaRequest(后续介绍HA时再详细分析)
MappedFile#appendMessagesInner 在MappedFile追加消息
/**
* 追加消息
*
* @param messageExt
* @param cb
* @return
*/
public AppendMessageResult appendMessagesInner(final MessageExt messageExt, final AppendMessageCallback cb) {
assert messageExt != null;
assert cb != null;
/**
* 当前MappedFile的写入位置
*/
int currentPos = this.wrotePosition.get();
/**
* 当前写入位置不超过文件最大值,小于1G继续写入,文件还有剩余空间
*/
if (currentPos < this.fileSize) {
/**
* 仅当transientStorePoolEnable 为true,刷盘策略为异步刷盘(FlushDiskType为ASYNC_FLUSH),
* 并且broker为主节点时,才启用堆外分配内存。此时:writeBuffer不为null
*
* Buffer与同步和异步刷盘相关
* writeBuffer/mappedByteBuffer 的 position始终为0,而limit则始终等于capacity
* slice创建一个新的buffer, 是根据position和limit来生成byteBuffer
*/
ByteBuffer byteBuffer = writeBuffer != null ? writeBuffer.slice() : this.mappedByteBuffer.slice();
byteBuffer.position(currentPos); // Buffer定位到当前位置,设置写的起始位置
AppendMessageResult result;
/**
* 根据消息类型,是批量消息还是单个消息,进入相应的处理
* doAppend是具体的消息写入实现,将消息写入内存
*/
if (messageExt instanceof MessageExtBrokerInner) {
result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBrokerInner) messageExt);
} else if (messageExt instanceof MessageExtBatch) { // 多个消息写入
result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBatch) messageExt);
} else {
return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
/**
* 修改这个MappedFile写入的位置
*/
this.wrotePosition.addAndGet(result.getWroteBytes());
this.storeTimestamp = result.getStoreTimestamp();
return result;
}
log.error("MappedFile.appendMessage return null, wrotePosition: {} fileSize: {}", currentPos, this.fileSize);
return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
- 第15行,获取当前MappedFile的写入位置
- 第19行,判断当前写入位置是否超过文件的最大值,小于1G,文件还有剩余空间就继续写入
- 第29行,Buffer定位到当前位置,设置为写的起始位置
- 第35行—第41行,根据消息的类型是批量消息还是单个消息,进行相应的而处理,以单个消息为例,详细解析见
DefaultAppendMessageCallback#doAppend
- 第45行和第46行分别是更新MappedFile的写入位置以及最后一次存储时间戳
DefaultAppendMessageCallback#doAppend 将消息写入MappedFile
/**
* 消息写入
*
* @param fileFromOffset 起始位置:一个commitLog文件(对应一个MappedFile)在整个文件系列中的偏移量
* 文件名代表这个偏移量
* @param byteBuffer NIO 字节容器
* @param maxBlank 最大可写字节数
* @param msgInner 消息内部封装实体
* @return
*/
public AppendMessageResult doAppend(final long fileFromOffset, final ByteBuffer byteBuffer, final int maxBlank,
final MessageExtBrokerInner msgInner) {
// STORETIMESTAMP + STOREHOSTADDRESS + OFFSET <br>
// PHY OFFSET 这是总的偏移量,在整个commitLog中一个总的
long wroteOffset = fileFromOffset + byteBuffer.position();
int sysflag = msgInner.getSysFlag();
int bornHostLength = (sysflag & MessageSysFlag.BORNHOST_V6_FLAG) == 0 ? 4 + 4 : 16 + 4;
int storeHostLength = (sysflag & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0 ? 4 + 4 : 16 + 4;
// 分别是broker存储的地址和消息的物理地址
ByteBuffer bornHostHolder = ByteBuffer.allocate(bornHostLength);
ByteBuffer storeHostHolder = ByteBuffer.allocate(storeHostLength);
// 重置storeHostHolder
this.resetByteBuffer(storeHostHolder, storeHostLength);
String msgId; // 消息ID
/**
* 根据broker存储的地址和消息的物理绝对位置创建msgId
*/
if ((sysflag & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0) {
msgId = MessageDecoder.createMessageId(this.msgIdMemory, msgInner.getStoreHostBytes(storeHostHolder), wroteOffset);
} else {
msgId = MessageDecoder.createMessageId(this.msgIdV6Memory, msgInner.getStoreHostBytes(storeHostHolder), wroteOffset);
}
// Record ConsumeQueue information 记录消息队列的信息
/**
* 根据topic -queryId获取该队列的偏移地址(待写入地址),如果没有就新增一个键值对,当前偏移量为0
*/
keyBuilder.setLength(0);
keyBuilder.append(msgInner.getTopic());
keyBuilder.append('-');
keyBuilder.append(msgInner.getQueueId());
String key = keyBuilder.toString();
// 获取在队列中的一个偏移量
Long queueOffset = CommitLog.this.topicQueueTable.get(key);
if (null == queueOffset) { // 如果没有的话,就重新设置一个新的
queueOffset = 0L;
CommitLog.this.topicQueueTable.put(key, queueOffset);
}
// Transaction messages that require special handling
/**
* @4start
* 对事务消息(PREPARE,ROLLBACK 准备或者回滚类型的消息)需要特殊处理,其偏移量都为0,不进入consumer队列
*/
final int tranType = MessageSysFlag.getTransactionValue(msgInner.getSysFlag());
switch (tranType) {
// Prepared and Rollback message is not consumed, will not enter the
// consumer queuec
case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
queueOffset = 0L;
break;
case MessageSysFlag.TRANSACTION_NOT_TYPE:
case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
default:
break;
}// @4 end
/**
* Serialize message
*/
final byte[] propertiesData =
msgInner.getPropertiesString() == null ? null : msgInner.getPropertiesString().getBytes(MessageDecoder.CHARSET_UTF8);
final int propertiesLength = propertiesData == null ? 0 : propertiesData.length;
// 消息的附加属性长度不超过 32767
if (propertiesLength > Short.MAX_VALUE) {
log.warn("putMessage message properties length too long. length={}", propertiesData.length);
return new AppendMessageResult(AppendMessageStatus.PROPERTIES_SIZE_EXCEEDED);
}
final byte[] topicData = msgInner.getTopic().getBytes(MessageDecoder.CHARSET_UTF8);
final int topicLength = topicData.length;
final int bodyLength = msgInner.getBody() == null ? 0 : msgInner.getBody().length;
/**
* 计算消息存储长度:不光消息本身,还有一些系统参数
*/
final int msgLen = calMsgLength(msgInner.getSysFlag(), bodyLength, topicLength, propertiesLength);
// Exceeds the maximum message 消息长度超过允许的最大长度,则返回MESSAGE_SIZE_EXCEEDED,应该是4M,批量也是?
if (msgLen > this.maxMessageSize) {
CommitLog.log.warn("message size exceeded, msg total size: " + msgLen + ", msg body size: " + bodyLength
+ ", maxMessageSize: " + this.maxMessageSize);
return new AppendMessageResult(AppendMessageStatus.MESSAGE_SIZE_EXCEEDED);
}
// Determines whether there is sufficient free space
/**
* 如果该MappedFile中可剩余空间小于当前消息长度,则返回AppendMessageStatus.END_OF_FILE
* broker会重新创建一个新的commitLog来存储该消息
* 从这里可以看出,每个commitLog文件至少剩余8个字节:
* (1)高4字节存储当前文件剩余空间,
* (2)低4字节存储魔数CommitLog.BLANK_MAGIC_CODE
*/
if ((msgLen + END_FILE_MIN_BLANK_LENGTH) > maxBlank) { // 没有足够的可用空间的话需要新建个commitLog
// 新建一个commitLog文件来存储消息
this.resetByteBuffer(this.msgStoreItemMemory, maxBlank);
// 1 TOTALSIZE 存储当前文件剩余空间
this.msgStoreItemMemory.putInt(maxBlank);
// 2 MAGICCODE 存储魔数
this.msgStoreItemMemory.putInt(CommitLog.BLANK_MAGIC_CODE);
// 3 The remaining space may be any value
// Here the length of the specially set maxBlank
final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
// 将消息存储到ByteBuffer中,然后创建AppendMessageResult对象
byteBuffer.put(this.msgStoreItemMemory.array(), 0, maxBlank);
return new AppendMessageResult(AppendMessageStatus.END_OF_FILE, wroteOffset, maxBlank, msgId, msgInner.getStoreTimestamp(),
queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);
}
// Initialization of storage space 初始化存储空间,然后将消息写入内存中
this.resetByteBuffer(msgStoreItemMemory, msgLen);
// 1 TOTALSIZE 总大小
this.msgStoreItemMemory.putInt(msgLen);
// 2 MAGICCODE magic
this.msgStoreItemMemory.putInt(CommitLog.MESSAGE_MAGIC_CODE);
// 3 BODYCRC body的crc
this.msgStoreItemMemory.putInt(msgInner.getBodyCRC());
// 4 QUEUEID 队列Id
this.msgStoreItemMemory.putInt(msgInner.getQueueId());
// 5 FLAG
this.msgStoreItemMemory.putInt(msgInner.getFlag());
// 6 QUEUEOFFSET 在队列中的偏移量
this.msgStoreItemMemory.putLong(queueOffset);
// 7 PHYSICALOFFSET 记录在整个大的commitLog中的位置
this.msgStoreItemMemory.putLong(fileFromOffset + byteBuffer.position());
// 8 SYSFLAG 记录系统flag
this.msgStoreItemMemory.putInt(msgInner.getSysFlag());
// 9 BORNTIMESTAMP 消息的创建时间 戳
this.msgStoreItemMemory.putLong(msgInner.getBornTimestamp());
// 10 BORNHOST
this.resetByteBuffer(bornHostHolder, bornHostLength);
this.msgStoreItemMemory.put(msgInner.getBornHostBytes(bornHostHolder));
// 11 STORETIMESTAMP
this.msgStoreItemMemory.putLong(msgInner.getStoreTimestamp());
// 12 STOREHOSTADDRESS
this.resetByteBuffer(storeHostHolder, storeHostLength);
this.msgStoreItemMemory.put(msgInner.getStoreHostBytes(storeHostHolder));
// 13 RECONSUMETIMES
this.msgStoreItemMemory.putInt(msgInner.getReconsumeTimes());
// 14 Prepared Transaction Offset
this.msgStoreItemMemory.putLong(msgInner.getPreparedTransactionOffset());
// 15 BODY
this.msgStoreItemMemory.putInt(bodyLength);
if (bodyLength > 0)
this.msgStoreItemMemory.put(msgInner.getBody());
// 16 TOPIC
this.msgStoreItemMemory.put((byte) topicLength);
this.msgStoreItemMemory.put(topicData);
// 17 PROPERTIES
this.msgStoreItemMemory.putShort((short) propertiesLength);
if (propertiesLength > 0)
this.msgStoreItemMemory.put(propertiesData);
final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
// Write messages to the queue buffer 消息写入对应的队列缓存
byteBuffer.put(this.msgStoreItemMemory.array(), 0, msgLen);
// 将消息存储到ByteBuffer中,然后创建AppendMessageResult对象
AppendMessageResult result = new AppendMessageResult(AppendMessageStatus.PUT_OK, wroteOffset, msgLen, msgId,
msgInner.getStoreTimestamp(), queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);
switch (tranType) {
/**
* 对事务消息(PREPARE,ROLLBACK类型的消息)需要特殊处理,其偏移量都为0,不进入consumer队列
*/
case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
break;
case MessageSysFlag.TRANSACTION_NOT_TYPE:
case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
// The next update ConsumeQueue information 记录更新下队列的一个偏移量,就是加1
CommitLog.this.topicQueueTable.put(key, ++queueOffset);
break;
default:
break;
}
return result;
}
- 第16行,获取在commitLog中的绝对位置
- 第20行至第26行,分别是broker存储的地址和消息的物理地址
- 第31行—第35行,根据broker存储地址和消息的绝对物理位置创建msgId
- 第41行—第51行,根据topic和queryId获取该队列的偏移地址,创建一个key,在利用该key,从
topicQueueTable
中获取对应队列的偏移地址(topicQueueTable是一个HashMap变量,保存topic-queueid, 偏移量之间的关系),如果该变量中没有对应的key,就该键值对添加到Map变量中 - 第58行—第70行,是针对事务消息进行处理
- 第75行—第177行,是对消息进行序列化,并计算消息存储所需的长度,如果可用空间能容纳当前消息,就封装消息后放入到对应的队列缓存byteBuffer中,并创建AppendMessageResult对象。没有足够的空间就创建一个新的commitLog来存储该消息
- 第189行,消息提交成功的话,会更新topicQueueTable中对应的偏移量