堆内存double free问题分析报告

【NE现场】

pid: 1044, tid: 1073, name: Binder_1  >>> com.android.systemui <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x543dc400
    r0 543dc3f4  r1 20ed7351  r2 00011ff8  r3 aaaaaaab
    r4 b5008680  r5 b0600000  r6 000000d9  r7 56752275
    r8 b6f526b4  r9 b6f52fbc  sl 00000000  fp b5008760
    ip 6becebf1  sp a5eb52f8  lr 035f675f  pc b6f1f546  cpsr 60030030
backtrace:
    #00 pc 0003d546  /system/lib/libc.so (arena_run_dalloc+97)
    #01 pc 0003e5e9  /system/lib/libc.so (je_arena_dalloc_large+24)
    #02 pc 000463d9  /system/lib/libc.so (ifree+700)
    #03 pc 0000fa13  /system/lib/libc.so (free+10)
    #04 pc 00018f15  /system/lib/libandroidfw.so (android::StreamingZipInflater::~StreamingZipInflater()+44)
    #05 pc 0000dc3f  /system/lib/libandroidfw.so (android::_CompressedAsset::getBuffer(bool)+62)
    #06 pc 000177c3  /system/lib/libandroidfw.so (android::ResTable::add(android::Asset*, android::Asset*, int, bool)+22)
    #07 pc 00010a33  /system/lib/libandroidfw.so (android::AssetManager::appendPathToResTable(android::AssetManager::asset_path const&, unsigned int*) const+270)
    #08 pc 00010e5f  /system/lib/libandroidfw.so (android::AssetManager::getResTable(bool) const+102)
    #09 pc 00080935  /system/lib/libandroid_runtime.so
    #10 pc 0001a963  /data/dalvik-cache/arm/system@framework@boot.oat

 

【分析问题】

凡是调用栈里有malloc和free的,基本上都是堆内存问题。对于这类问题我们必须有core文件才能分析。

(gdb) bt
#0  0xb6f6f546 in arena_run_dalloc (arena=arena@entry=0xb5127040, run=<optimized out>, dirty=dirty@entry=true, cleaned=cleaned@entry=false) at external/jemalloc/src/arena.c:1270
#1  0xb6f705c2 in je_arena_dalloc_large_locked (arena=arena@entry=0xb5127040, chunk=chunk@entry=0x8a900000, ptr=ptr@entry=0x8a90a000) at external/jemalloc/src/arena.c:2063
#2  0xb6f705ec in je_arena_dalloc_large (arena=0xb5127040, chunk=chunk@entry=0x8a900000, ptr=ptr@entry=0x8a90a000) at external/jemalloc/src/arena.c:2071
#3  0xb6f783dc in je_arena_dalloc (try_tcache=true, ptr=0x8a90a000, chunk=0x8a900000) at external/jemalloc/include/jemalloc/internal/arena.h:1168
#4  je_idalloct (try_tcache=true, ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:774
#5  je_iqalloct (try_tcache=true, ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:793
#6  je_iqalloc (ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:800
#7  ifree (ptr=0x8a90a000) at external/jemalloc/src/jemalloc.c:1228
#8  0xb6f41a14 in free (mem=<optimized out>) at bionic/libc/bionic/malloc_debug_common.cpp:251
#9  0xb6c74f06 in android::StreamingZipInflater::~StreamingZipInflater (this=0x8eb24280, __in_chrg=<optimized out>) at frameworks/base/libs/androidfw/StreamingZipInflater.cpp:93
#10 0xb6c69c42 in android::_CompressedAsset::getBuffer (this=0x8eb32330) at frameworks/base/libs/androidfw/Asset.cpp:887
#11 0xb6c737c4 in android::ResTable::add (this=0x8e870c00, asset=asset@entry=0x8eb32330, idmapAsset=idmapAsset@entry=0x0, cookie=cookie@entry=5, copyData=copyData@entry=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3371
#12 0xb6c6ca36 in android::AssetManager::appendPathToResTable (this=this@entry=0x8e838390, ap=..., entryIdx=entryIdx@entry=0x89de3444) at frameworks/base/libs/androidfw/AssetManager.cpp:678
#13 0xb6c6ce62 in android::AssetManager::getResTable (this=0x8e838390, required=<optimized out>) at frameworks/base/libs/androidfw/AssetManager.cpp:727
#14 0xb6c6cebc in android::AssetManager::getResources (this=<optimized out>, required=required@entry=true) at frameworks/base/libs/androidfw/AssetManager.cpp:813
#15 0xb6ea5b20 in android::android_content_AssetManager_getStringBlockCount (env=<optimized out>, clazz=<optimized out>) at frameworks/base/core/jni/android_util_AssetManager.cpp:886 

从#9,#10层可以看到,是android::_CompressedAsset::getBuffer()中释放StreamingZipInflater对象时挂掉的。对应代码为:

 

const void* _CompressedAsset::getBuffer(bool)
{
    unsigned char* buf = NULL;
 
    if (mBuf != NULL)
        return mBuf;
 
    /*
     * Allocate a buffer and read the file into it.
     */
    buf = new unsigned char[mUncompressedLen];
    if (buf == NULL) {
        ALOGW("alloc %ld bytes failed\n", (long) mUncompressedLen);
        goto bail;
    }
 
    if (mMap != NULL) {
        if (!ZipUtils::inflateToBuffer(mMap->getDataPtr(), buf,
                mUncompressedLen, mCompressedLen))
            goto bail;
    } else {
        assert(mFd >= 0);
 
        /*
         * Seek to the start of the compressed data.
         */
        if (lseek(mFd, mStart, SEEK_SET) != mStart)
            goto bail;
 
        /*
         * Expand the data into it.
         */
        if (!ZipUtils::inflateToBuffer(mFd, buf, mUncompressedLen,
                mCompressedLen))
            goto bail;
    }
 
    /*
     * Success - now that we have the full asset in RAM we
     * no longer need the streaming inflater
     */
    delete mZipInflater;        // <<<<<<<这里挂掉的
    mZipInflater = NULL;
    
    mBuf = buf;
    buf = NULL;
 
 bail:
    delete[] buf;
    return mBuf;
}

 

 #0层对应的代码如下:

 

@external/jemalloc/src/arena.c

static void
arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
{
        arena_chunk_t *chunk;
        size_t size, run_ind, run_pages, flag_dirty;
 
        chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
        run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
        assert(run_ind >= map_bias);
        assert(run_ind < chunk_npages);
        if (arena_mapbits_large_get(chunk, run_ind) != 0) {
                size = arena_mapbits_large_size_get(chunk, run_ind);
                assert(size == PAGE ||
                    arena_mapbits_large_size_get(chunk,
                    run_ind+(size>>LG_PAGE)-1) == 0);
        } else {
                size_t binind = arena_bin_index(arena, run->bin);
                arena_bin_info_t *bin_info = &arena_bin_info[binind]; 
                size = bin_info->run_size;     // <<<<<<<<<<<<<<<<<<<<#0层
        }

 

NE的点是size = bin_info->run_size

arena_mapbits_large_get(chunk, run_ind)的判断表示当前内存快是否是large块。这里走了else,表示当前块是small块。

而调用栈里#2中je_arena_dalloc_large表示这里是large块。其对应的判断在#3层:

 

@external/jemalloc/include/jemalloc/internal/arena.h

JEMALLOC_ALWAYS_INLINE void
arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
{
        size_t pageind, mapbits;
        tcache_t *tcache;
 
        assert(ptr != NULL);
        assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
        pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
        mapbits = arena_mapbits_get(chunk, pageind);
        assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
        if ((mapbits & CHUNK_MAP_LARGE) == 0) {
                /* Small allocation. */
                if (try_tcache && (tcache = tcache_get(false)) != NULL) {
                        size_t binind;
 
                        binind = arena_ptr_small_binind_get(ptr, mapbits);
                        tcache_dalloc_small(tcache, ptr, binind);
                } else
                        arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
        } else {
                size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
                assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 
                if (try_tcache && size <= tcache_maxclass && (tcache =
                    tcache_get(false)) != NULL) {
                        tcache_dalloc_large(tcache, ptr, size);
                } else
                        arena_dalloc_large(chunk->arena, chunk, ptr);    // <<<<<<<<<<<<<<<<<<<<#3层
        }
}

 

arena.h中的(mapbits & CHUNK_MAP_LARGE) 和

arena.c中的arena_mapbits_large_get(chunk, run_ind) 是同样的判断。

 

 

 

如下面代码:

JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
{
        size_t mapbits;
 
        mapbits = arena_mapbits_get(chunk, pageind);
        return (mapbits & CHUNK_MAP_LARGE);
}

为什么同样的判断在时刻得到的值不一样呢?很可能是同一时刻有别的执行流在操作这个值!

查找其他线程的当前调用栈,发现确实存可疑线程:

(gdb) t 24
[Switching to thread 24 (LWP 4234)]
#0  syscall () at bionic/libc/arch-arm/bionic/syscall.S:44

(gdb) bt
#0  syscall () at bionic/libc/arch-arm/bionic/syscall.S:44
#1  0xb6f46016 in __futex (timeout=0x0, value=2, op=128, ftx=0xb5127048) at bionic/libc/private/bionic_futex.h:45
#2  __futex_wait_ex (ftx=ftx@entry=0xb5127048, shared=shared@entry=false, value=value@entry=2, timeout=0x0) at bionic/libc/private/bionic_futex.h:66
#3  0xb6f463ac in _normal_lock (shared=0, mutex=0xb5127048) at bionic/libc/bionic/pthread_mutex.cpp:337
#4  pthread_mutex_lock (mutex=mutex@entry=0xb5127048) at bionic/libc/bionic/pthread_mutex.cpp:457
#5  0xb6f6fb78 in je_malloc_mutex_lock (mutex=0xb5127048) at external/jemalloc/include/jemalloc/internal/mutex.h:77
#6  arena_bin_nonfull_run_get (bin=0xb5127300, arena=0xb5127040) at external/jemalloc/src/arena.c:1468
#7  arena_bin_malloc_hard (arena=arena@entry=0xb5127040, bin=bin@entry=0xb5127300) at external/jemalloc/src/arena.c:1515
#8  0xb6f6fdf6 in je_arena_tcache_fill_small (arena=0xb5127040, tbin=tbin@entry=0x8eb0e098, binind=binind@entry=5, prof_accumbytes=prof_accumbytes@entry=0) at external/jemalloc/src/arena.c:1573
#9  0xb6f7d97a in je_tcache_alloc_small_hard (tcache=tcache@entry=0x8eb0e000, tbin=tbin@entry=0x8eb0e098, binind=binind@entry=5) at external/jemalloc/src/tcache.c:72
#10 0xb6f79552 in je_tcache_alloc_small (zero=false, size=<optimized out>, tcache=0x8eb0e000) at external/jemalloc/include/jemalloc/internal/tcache.h:272
#11 je_arena_malloc (try_tcache=true, zero=false, size=<optimized out>, arena=0x0) at external/jemalloc/include/jemalloc/internal/arena.h:1074
#12 je_imalloct (arena=0x0, try_tcache=true, size=<optimized out>) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:647
#13 je_imalloc (size=<optimized out>) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:656
#14 imalloc_body (usize=<synthetic pointer>, size=<optimized out>) at external/jemalloc/src/jemalloc.c:920
#15 je_malloc (size=<optimized out>) at external/jemalloc/src/jemalloc.c:932
#16 0xb6f41a40 in malloc (bytes=<optimized out>) at bionic/libc/bionic/malloc_debug_common.cpp:259
#17 0xb6f0eb0c in operator new (size=size@entry=44) at bionic/libc/bionic/new.cpp:26
#18 0xb6c73230 in android::ResTable::parsePackage (this=this@entry=0x8eb85180, pkg=pkg@entry=0x8a923b88, header=0x8eb58040) at frameworks/base/libs/androidfw/ResourceTypes.cpp:5806
#19 0xb6c73600 in android::ResTable::addInternal (this=this@entry=0x8eb85180, data=<optimized out>, data@entry=0x8a91a000, dataSize=<optimized out>, idmapData=idmapData@entry=0x0,
    idmapDataSize=idmapDataSize@entry=0, cookie=cookie@entry=5, copyData=copyData@entry=false) at frameworks/base/libs/androidfw/ResourceTypes.cpp:3539
#20 0xb6c73824 in android::ResTable::add (this=0x8eb85180, asset=asset@entry=0x8eb32330, idmapAsset=idmapAsset@entry=0x0, cookie=cookie@entry=5, copyData=copyData@entry=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3389
#21 0xb6c6ca36 in android::AssetManager::appendPathToResTable (this=this@entry=0x8eb2c160, ap=..., entryIdx=entryIdx@entry=0x8b8d4444) at frameworks/base/libs/androidfw/AssetManager.cpp:678
#22 0xb6c6ce62 in android::AssetManager::getResTable (this=0x8eb2c160, required=<optimized out>) at frameworks/base/libs/androidfw/AssetManager.cpp:727
#23 0xb6c6cebc in android::AssetManager::getResources (this=<optimized out>, required=required@entry=true) at frameworks/base/libs/androidfw/AssetManager.cpp:813
#24 0xb6ea5b20 in android::android_content_AssetManager_getStringBlockCount (env=<optimized out>, clazz=<optimized out>) at frameworks/base/core/jni/android_util_AssetManager.cpp:886

对比两个线程的调用栈:

 

1073线程:

 

#11 0xb6c737c4 in android::ResTable::add (this=0x8e870c00, asset=asset@entry=0x8eb32330, idmapAsset=idmapAsset@entry=0x0, cookie=cookie@entry=5, copyData=copyData@entry=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3371 

 

4234线程:

 

#20 0xb6c73824 in android::ResTable::add (this=0x8eb85180, asset=asset@entry=0x8eb32330 idmapAsset=idmapAsset@entry=0x0, cookie=cookie@entry=5, copyData=copyData@entry=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3389 

 

就是说两个线程都在执行相同的代码,只是4234线程比1073线程快一点。

 

 

 

@frameworks/base/libs/androidfw/ResourceTypes.cpp

status_t ResTable::add(Asset* asset, Asset* idmapAsset, const int32_t cookie, bool copyData) {
    const void* data = asset->getBuffer(true);
    if (data == NULL) {
        ALOGW("Unable to get buffer of resource asset file");
        return UNKNOWN_ERROR;
    }
 
    size_t idmapSize = 0;
    const void* idmapData = NULL;
    if (idmapAsset != NULL) {
        idmapData = idmapAsset->getBuffer(true);
        if (idmapData == NULL) {
            ALOGW("Unable to get buffer of idmap asset file");
            return UNKNOWN_ERROR;
        }
        idmapSize = static_cast<size_t>(idmapAsset->getLength());
    }
 
    return addInternal(data, static_cast<size_t>(asset->getLength()),
            idmapData, idmapSize, cookie, copyData);
}

asset->getBuffer(true)前是没有任何判断的,

所以4234必定调用过 asset->getBuffer(),且两个执行流操作的都是同一个asset对象(地址都是0x8eb32330)。

至此确定为重复释放。

 

【解决方案】

 
所以解决方案是:
_CompressedAsset::getBuffer()中做加锁保护。
posted @ 2017-05-10 21:02  YYPapa  阅读(2091)  评论(0编辑  收藏  举报