netty内存分配
https://www.cnblogs.com/s686zhou/p/15714858.html
https://www.jianshu.com/p/1ce3bc2d7c5e
https://blog.csdn.net/wangwei19871103/category_9681495_2.html
netty4.1.50版本
rss分配内存链路
PooledByteBufAllocator#ioBuffer(int initialCapacity, int maxCapacity)
=>AbstractByteBufAllocator#ioBuffer(int initialCapacity, int maxCapacity)
=>AbstractByteBufAllocator#directBuffer(initialCapacity, maxCapacity)
=>PooledByteBufAllocator#newDirectBuffer(initialCapacity, maxCapacity)
=>PoolArena<ByteBuffer>#allocate
private void allocate(PoolThreadCache cache, PooledByteBuf<T> buf, final int reqCapacity) {
final int normCapacity = normalizeCapacity(reqCapacity);
if (isTinyOrSmall(normCapacity)) { // capacity < pageSize
int tableIdx;
PoolSubpage<T>[] table;
boolean tiny = isTiny(normCapacity);
if (tiny) { // < 512
if (cache.allocateTiny(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
tableIdx = tinyIdx(normCapacity); //normCapacity/16
table = tinySubpagePools;
} else {
if (cache.allocateSmall(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
tableIdx = smallIdx(normCapacity);
table = smallSubpagePools;
}
final PoolSubpage<T> head = table[tableIdx];//根据table和id找到PoolSubpage
/**
* Synchronize on the head. This is needed as {@link PoolChunk#allocateSubpage(int)} and
* {@link PoolChunk#free(long)} may modify the doubly linked list as well.
*/
synchronized (head) {//从PoolSubpage分配
final PoolSubpage<T> s = head.next;
if (s != head) {
assert s.doNotDestroy && s.elemSize == normCapacity;
long handle = s.allocate();
assert handle >= 0;
s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity, cache);
incTinySmallAllocation(tiny);
return;
}
}
synchronized (this) { // 没命中PoolSubpage,走allocateNormal
allocateNormal(buf, reqCapacity, normCapacity, cache);
}
incTinySmallAllocation(tiny);
return;
}
if (normCapacity <= chunkSize) {//大于等于512字节,小于16m的时候,先从缓存cache分配
if (cache.allocateNormal(this, buf, reqCapacity, normCapacity)) {
// was able to allocate out of the cache so move on
return;
}
synchronized (this) {//没命中缓存,走allocateNormal
allocateNormal(buf, reqCapacity, normCapacity, cache);
++allocationsNormal;
}
} else {//分配大页
// Huge allocations are never served via the cache so just call allocateHuge
allocateHuge(buf, reqCapacity);
}
}
-
cache是worker线程的缓存;
-
buf是PooledUnsafeDirectByteBuf类型,通过下面方式初始化的,maxCapacity是申请的maxCapacity,RECYCLER.get();会从回收站拿到一个PooledUnsafeDirectByteBuf,reuse操作会重置这个buf的引用计数。
static PooledUnsafeDirectByteBuf newInstance(int maxCapacity) { PooledUnsafeDirectByteBuf buf = RECYCLER.get(); buf.reuse(maxCapacity); return buf; }
-
reqCapacity是实际申请的内存
normalizeCapacity:
int normalizeCapacity(int reqCapacity) {
checkPositiveOrZero(reqCapacity, "reqCapacity");
if (reqCapacity >= chunkSize) {
return directMemoryCacheAlignment == 0 ? reqCapacity : alignCapacity(reqCapacity);
}
if (!isTiny(reqCapacity)) { // >= 512
// Doubled
int normalizedCapacity = reqCapacity;
normalizedCapacity --;
normalizedCapacity |= normalizedCapacity >>> 1;
normalizedCapacity |= normalizedCapacity >>> 2;
normalizedCapacity |= normalizedCapacity >>> 4;
normalizedCapacity |= normalizedCapacity >>> 8;
normalizedCapacity |= normalizedCapacity >>> 16;
normalizedCapacity ++;
if (normalizedCapacity < 0) {
normalizedCapacity >>>= 1;
}
assert directMemoryCacheAlignment == 0 || (normalizedCapacity & directMemoryCacheAlignmentMask) == 0;
return normalizedCapacity;
}
if (directMemoryCacheAlignment > 0) {
return alignCapacity(reqCapacity);
}
// Quantum-spaced
if ((reqCapacity & 15) == 0) {
return reqCapacity;
}
return (reqCapacity & ~15) + 16;
}
-
大于等于16m,直接返回reqCapacity
-
大于等于512字节,取2的幂次
-
大于等于16字节,而且还是16的倍数,直接返回reqCapacity
-
大于16字节,而且不是16的倍数,返回(reqCapacity/16+1) * 16,比如reqCapacity=37时,返回3 * 16 = 48
-
小于16字节,16字节
rss的配置
DEFAULT_PAGE_SIZE = SystemPropertyUtil.getInt("io.netty.allocator.pageSize", 8192);
DEFAULT_MAX_ORDER = SystemPropertyUtil.getInt("io.netty.allocator.maxOrder", 11);
DEFAULT_CHUNK_SIZE = DEFAULT_PAGE_SIZE << DEFAULT_MAX_ORDER;
DIRECT_MEMORY_CACHE_ALIGNMENT =
SystemPropertyUtil.getInt("io.netty.allocator.directMemoryCacheAlignment", 0);//directMemoryCacheAlignment
DIRECT_MEMORY_CACHE_ALIGNMENT_MASK = DIRECT_MEMORY_CACHE_ALIGNMENT - 1;//directMemoryCacheAlignmentMask
tinySubpagePools:长度为32,每个index按照16字节递增。从16B开始,一直到496B,tinySubpagePools[0]啥也不保存。
smallSubpagePools:长度为pageShifts - 9;在pagesize=8k时,算出来的pageShifts=validateAndCalculatePageShifts(8192)=32-1-18=13,故smallSubpagePools长度为4。4个索引分别对应512,1024,2048,4096。
PoolArena#allocateNormal
private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (q050.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q025.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q000.allocate(buf, reqCapacity, normCapacity, threadCache) ||
qInit.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q075.allocate(buf, reqCapacity, normCapacity, threadCache)) {
return;
}
// Add a new chunk.
PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize);
boolean success = c.allocate(buf, reqCapacity, normCapacity, threadCache);
assert success;
qInit.add(c);
}
先从Chunk队列分配,分配不出来会new Chunk。
其他几个内存分配的方法
PoolThreadCache分配内存
PoolThreadCache https://www.jianshu.com/p/9177b7dabd37
// Hold the caches for the different size classes, which are tiny, small and normal.
private final MemoryRegionCache<byte[]>[] tinySubPageHeapCaches;
private final MemoryRegionCache<byte[]>[] smallSubPageHeapCaches;
private final MemoryRegionCache<ByteBuffer>[] tinySubPageDirectCaches;
private final MemoryRegionCache<ByteBuffer>[] smallSubPageDirectCaches;
private final MemoryRegionCache<byte[]>[] normalHeapCaches;
private final MemoryRegionCache<ByteBuffer>[] normalDirectCaches;
看下allocateTiny方法
boolean allocateTiny(PoolArena<?> area, PooledByteBuf<?> buf, int reqCapacity, int normCapacity) {//reqCapacity原申请,normCapacity规范化的内存
return allocate(cacheForTiny(area, normCapacity), buf, reqCapacity);
}
private MemoryRegionCache<?> cacheForTiny(PoolArena<?> area, int normCapacity) {
int idx = PoolArena.tinyIdx(normCapacity);//拿到对应的下标
if (area.isDirect()) {
return cache(tinySubPageDirectCaches, idx);//用到的是MemoryRegionCache<ByteBuffer>[] tinySubPageDirectCaches;
}
return cache(tinySubPageHeapCaches, idx);
}
private static <T> MemoryRegionCache<T> cache(MemoryRegionCache<T>[] cache, int idx) {
if (cache == null || idx > cache.length - 1) {
return null;
}
return cache[idx];//找到对应的MemoryRegionCache
}
private boolean allocate(MemoryRegionCache<?> cache, PooledByteBuf buf, int reqCapacity) {
if (cache == null) {
// no cache found so just return false here
return false;
}
boolean allocated = cache.allocate(buf, reqCapacity, this);//用找到的缓存分配
if (++ allocations >= freeSweepAllocationThreshold) {
allocations = 0;
trim();
}
return allocated;
}
======================
MemoryRegionCache
public final boolean allocate(PooledByteBuf<T> buf, int reqCapacity, PoolThreadCache threadCache) {
Entry<T> entry = queue.poll();
if (entry == null) {
return false;
}
initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity, threadCache);//这里就是在根据maxCapacity初始化的PooledUnsafeDirectByteBuf基础上分配的
entry.recycle();//
// allocations is not thread-safe which is fine as this is only called from the same thread all time.
++ allocations;
return true;
}
对象池技术
PoolThreadCache 每个worker线程都有一个.
内部6个缓存对象的创建
private static <T> MemoryRegionCache<T>[] createSubPageCaches(
int cacheSize, int numCaches, SizeClass sizeClass) {
if (cacheSize > 0 && numCaches > 0) {
@SuppressWarnings("unchecked")
MemoryRegionCache<T>[] cache = new MemoryRegionCache[numCaches];
for (int i = 0; i < cache.length; i++) {
// TODO: maybe use cacheSize / cache.length
cache[i] = new SubPageMemoryRegionCache<T>(cacheSize, sizeClass);
}
return cache;
} else {
return null;
}
}
cacheSize:缓存的大小
numcaches:数组长度
在释放对象的时候,会把内存加到缓存中,执行MemoryRegionCache.add方法
MemoryRegionCache(int size, SizeClass sizeClass) {
this.size = MathUtil.safeFindNextPositivePowerOfTwo(size);
queue = PlatformDependent.newFixedMpscQueue(this.size);
this.sizeClass = sizeClass;
}
@SuppressWarnings("unchecked")
public final boolean add(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle) {
Entry<T> entry = newEntry(chunk, nioBuffer, handle);
boolean queued = queue.offer(entry);
if (!queued) {
// If it was not possible to cache the chunk, immediately recycle the entry
entry.recycle();
}
return queued;
}
往上追代码
tinySubPageDirectCaches = createSubPageCaches(
tinyCacheSize, PoolArena.numTinySubpagePools, SizeClass.Tiny);
DEFAULT_TINY_CACHE_SIZE = SystemPropertyUtil.getInt("io.netty.allocator.tinyCacheSize", 512);
即MemoryRegionCache队列长度有512个。超过这个就不能再加进去了。一个chunk 16m,80个线程,它缓存的是entry。
PoolSubpage分配内存
PoolSubpage(PoolSubpage<T> head, PoolChunk<T> chunk, int memoryMapIdx, int runOffset, int pageSize, int elemSize) {
this.chunk = chunk;
this.memoryMapIdx = memoryMapIdx;
this.runOffset = runOffset;
this.pageSize = pageSize;
bitmap = new long[pageSize >>> 10]; // pageSize / 16 / 64 = 8,这里的64是一个long类型大小。
init(head, elemSize);
}
void init(PoolSubpage<T> head, int elemSize) {
doNotDestroy = true;
this.elemSize = elemSize;
if (elemSize != 0) {
maxNumElems = numAvail = pageSize / elemSize;//maxNumElems最大值为8192/16=512,当然elemSize不可能总被整除
nextAvail = 0;
bitmapLength = maxNumElems >>> 6;//最大值就是8
if ((maxNumElems & 63) != 0) {
bitmapLength ++;//如果不能被64整除,就再加1
}
for (int i = 0; i < bitmapLength; i ++) {
bitmap[i] = 0;
}
}
addToPool(head);//放到normCapacity对应的head后
}
如何理解bitmap?它统计每个内存块的使用情况。
假设elemSize=256b,那么一个page会被分为32份,因为32 &63 !=1,bitmapLength=1。故只用一个long类型就能表示这32个内存块的使用情况,其中最低位表示第1个内存块,高位表示最后一个内存块
long allocate() {
if (elemSize == 0) {
return toHandle(0);
}
//没有可用内存块,分配失败。通常PoolSubpage分配完成后会从PoolArena#smallSubpagePools中移除,不再在该PoolSubpage上分配内存,所以一般不会出现这种场景。
if (numAvail == 0 || !doNotDestroy) {
return -1;
}
//获取下一个可用内存块的bit下标,如果一个page会被分为32份,它返回0-31
final int bitmapIdx = getNextAvail();
//设置对应bit为1,即已使用bitmapIdx >>> 6
//,获取该内存块在bitmap数组中第q元素bitmapIdx & 63,
int q = bitmapIdx >>> 6;
int r = bitmapIdx & 63;
assert (bitmap[q] >>> r & 1) == 0;//获取该内存块是bitmap数组中第q个元素的第r个bit位bitmap[q] |= 1L << r,
bitmap[q] |= 1L << r; //将bitmap数组中第q个元素的第r个bit位设置为1,表示已经使用
if (-- numAvail == 0) {
removeFromPool();
}
return toHandle(bitmapIdx);
}
private long toHandle(int bitmapIdx) {
return 0x4000000000000000L | (long) bitmapIdx << 32 | memoryMapIdx;
}
toHandle:
https://blog.csdn.net/wangwei19871103/article/details/104356566
memoryMapIdx是chunk中二叉树的节点id,tohandle的返回结果就是
[位图id(32位),节点id(32位)],其中和0x4000000000000000L做或运算是不想高位为0。相当于一个标志为,用来区分分配run和子页的情况。当handle最高为是1的时候就表示此时分配的是子页。
它分配内存的方法就是找到一个bitmapIdx,返回一个整形,最后对buf进行初始化
private int getNextAvail() {
int nextAvail = this.nextAvail;
if (nextAvail >= 0) {
this.nextAvail = -1;
return nextAvail;
}
return findNextAvail();
}
private int findNextAvail() {
final long[] bitmap = this.bitmap;
final int bitmapLength = this.bitmapLength;
for (int i = 0; i < bitmapLength; i ++) {
long bits = bitmap[i];
if (~bits != 0) {//一旦一个块的bit位被标记为1就表示不能使用了,一个long类型不可用,所有位都是1,取反后就是0了,所以只要不是0,就表示还有可能存在可用bit
return findNextAvail0(i, bits);
}
}
return -1;
}
private int findNextAvail0(int i, long bits) {//i是索引,bits是bitmap
final int maxNumElems = this.maxNumElems;
final int baseVal = i << 6;
for (int j = 0; j < 64; j ++) {
if ((bits & 1) == 0) {//从第一个内存块(低位)到最后的内存块(高位)逐步查找有没有为0(没分配)
int val = baseVal | j;
if (val < maxNumElems) {//保证返回的内存块不超过maxNumElem
return val;
} else {
break;
}
}
bits >>>= 1;//查找下一个内存
}
return -1;
}
private void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer,
long handle, int bitmapIdx, int reqCapacity, PoolThreadCache threadCache) {
assert bitmapIdx != 0;
int memoryMapIdx = memoryMapIdx(handle);//根据handle计算一个坐标
PoolSubpage<T> subpage = subpages[subpageIdx(memoryMapIdx)];
assert subpage.doNotDestroy;
assert reqCapacity <= subpage.elemSize;
buf.init(
this, nioBuffer, handle,
runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,
reqCapacity, subpage.elemSize, threadCache);
}
void init(PoolChunk<T> chunk, ByteBuffer nioBuffer,
long handle, int offset, int length, int maxLength, PoolThreadCache cache) {
init0(chunk, nioBuffer, handle, offset, length, maxLength, cache);
}
void initUnpooled(PoolChunk<T> chunk, int length) {
init0(chunk, null, 0, chunk.offset, length, length, null);
}
private void init0(PoolChunk<T> chunk, ByteBuffer nioBuffer,
long handle, int offset, int length, int maxLength, PoolThreadCache cache) {
assert handle >= 0;
assert chunk != null;
this.chunk = chunk;
memory = chunk.memory;
tmpNioBuf = nioBuffer;
allocator = chunk.arena.parent;
this.cache = cache;
this.handle = handle;
this.offset = offset;
this.length = length;
this.maxLength = maxLength;
}
PoolChunkList分配内存
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (normCapacity > maxCapacity) {
// Either this PoolChunkList is empty or the requested capacity is larger then the capacity which can
// be handled by the PoolChunks that are contained in this PoolChunkList.
return false;
}
for (PoolChunk<T> cur = head; cur != null; cur = cur.next) {
if (cur.allocate(buf, reqCapacity, normCapacity, threadCache)) {
if (cur.freeBytes <= freeMinThreshold) {
remove(cur);
nextList.add(cur);
}
return true;
}
}
return false;
}
主要方法也是从head对应的Chunk开始分配,分配成功后按照使用率移动cur。
PoolChunk分配内存
https://www.cnblogs.com/binecy/p/14191601.html
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
final long handle;
if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
handle = allocateRun(normCapacity);
} else {
handle = allocateSubpage(normCapacity);
}
if (handle < 0) {
return false;
}
ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null;
initBuf(buf, nioBuffer, handle, reqCapacity, threadCache);
return true;
}
private long allocateSubpage(int normCapacity) {
// Obtain the head of the PoolSubPage pool that is owned by the PoolArena and synchronize on it.
// This is need as we may add it back and so alter the linked-list structure.
PoolSubpage<T> head = arena.findSubpagePoolHead(normCapacity);//找到tinySubpagePools/smallSubpagePools对应的下标
int d = maxOrder; // subpages are only be allocated from pages i.e., leaves
synchronized (head) {
int id = allocateNode(d);
if (id < 0) {
return id;
}
final PoolSubpage<T>[] subpages = this.subpages;
final int pageSize = this.pageSize;
freeBytes -= pageSize;
int subpageIdx = subpageIdx(id);
PoolSubpage<T> subpage = subpages[subpageIdx];
if (subpage == null) {
subpage = new PoolSubpage<T>(head, this, id, runOffset(id), pageSize, normCapacity);//初始化PoolSubpage
subpages[subpageIdx] = subpage;
} else {
subpage.init(head, normCapacity);//直接对这个页初始化
}
return subpage.allocate();//调用新创建的subpage的allocate方法。
}
}
void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity,
PoolThreadCache threadCache) {
int memoryMapIdx = memoryMapIdx(handle);
int bitmapIdx = bitmapIdx(handle);
if (bitmapIdx == 0) {
byte val = value(memoryMapIdx);
assert val == unusable : String.valueOf(val);
buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset,
reqCapacity, runLength(memoryMapIdx), threadCache);
} else {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity, threadCache);
}
}
-
大于等于8k字节的内存,使用allocateRun
-
否则使用allocateSubpage
每个PoolChunk内持有subpages数组,subpages = newSubpageArray(maxSubpageAllocs); maxSubpageAllocs = 2^maxOrder = 2048 [16m / 8k = 2048]
// Generate the memory map.
memoryMap = new byte[maxSubpageAllocs << 1];//4096,初始化内存管理二叉树,将每一层节点值设置为层数d。
depthMap = new byte[memoryMap.length];//4096,保存二叉树的层数,用于通过位置下标找到其在整棵树中对应的层数。注意:depthMap的值代表二叉树的层数,初始化后不再变化。
int memoryMapIndex = 1;
for (int d = 0; d <= maxOrder; ++ d) { // move down the tree one level at a time
int depth = 1 << d;
for (int p = 0; p < depth; ++ p) {
// in each level traverse left to right and set value to the depth of subtree
memoryMap[memoryMapIndex] = (byte) d;
depthMap[memoryMapIndex] = (byte) d;
memoryMapIndex ++;
}
}
memoryMap和depthMap的元素就是按照二叉树的节点顺序排序的,memoryMap[0]=0(没用),memoryMap[1]=0,memoryMap[2]=1,memoryMap[3]=1,memoryMap[4]=2。。。
allocateNode方法:
private int allocateNode(int d) {
int id = 1;
int initial = - (1 << d); // has last d bits = 0 and rest all = 1
byte val = value(id);
if (val > d) { // unusable
return -1;
//memoryMap[1] > d,第0层的可分配内存不足,表明该PoolChunk内存不能满足分配,分配失败。
}
while (val < d || (id & initial) == 0) { // id & initial == 1 << d for all ids at depth d, for < d it is 0
//val < d,即该节点内存满足分配。
//id & initial = 0,即 id < 1<<d, d层之前循环继续执行。这里并不会出现val > d的场景,但会出现val == d的场景,如PoolChunk当前可分配内存为2M,即memoryMap[1] = 3,这时申请2M内存,在0-2层,都是val == d
id <<= 1;
//向下找到下一层下标,注意,子树左节点的下标是父节点下标的2倍。
val = value(id);
if (val > d) {
//表示当前节点不能满足分配,查找同一父节点下的兄弟节点,在兄弟节点上分配内存。id ^= 1,此时id为偶数,即为id+=1,
id ^= 1;
val = value(id);
}
}
byte value = value(id);
assert value == d && (id & initial) == 1 << d : String.format("val = %d, id & initial = %d, d = %d",
value, id & initial, d);
setValue(id, unusable); // mark as unusable,unusable=12
updateParentsAlloc(id);//向上遍历父节点,把父节点的value设置为min(left,right)
return id;
}
eg:
为什么要用高度d?
高度某种程度上表示这个二叉树目前可分配的内存的大小。
比如执行allocateNode(1),此时表示要分配8m内存,如果memoryMap[1]=0,表示这16m还没有分配,可以放心的到下一层去找内存;一旦分配了2m内存,那么跟节点会被更新为下一层的d最小值,memoryMap[1]=1,这个时候表示虽然已经分配出内存了,但是至少有一个8m是完整的;如果此时又在右子树分了1m内存,那么memoryMap[1]肯定比1大了,两个8m的子树都不完整,那么这个chunk就不能再完成8m的内存请求了。
实际查找内存块的顺序:从上到下,先左后右
4.1.50和4.1.69分配内存大小的异同:
public class nettytest {
public static void main(String[] args) {
PooledByteBufAllocator alloc = new PooledByteBufAllocator(true, 1, 1, 8192, 11);
ByteBuf b = alloc.ioBuffer(514, 514);
b.retain();
System.out.println(alloc.dumpStats());
/* while(b.isWritable()) {
b.writeBoolean(true);
}
int cnt = 0;
while(b.isReadable()){
System.out.println(b.readByte());
System.out.println("cnt:" + cnt);
cnt++;
}*/
}
}
4.1.50
1024字节
4.1.69
640字节
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?