用crash工具分析内核死锁问题

拿到一个定屏现场后先ramdump一下,再用crash工具加载dump文件和vmlinux。

进入crash工具环境后,首先查看一下所有D状态的线程:

crash_ARM64> ps |grep UN
     59      2   1  ffffffd774998000  UN   0.0       0      0  [kworker/u16:1]
    161      2   0  ffffffd7738e6400  UN   0.0       0      0  [mdss_dsi_event]
    428      2   3  ffffffd772fb8000  UN   0.0       0      0  [mmc-cmdqd/0]
    494      2   1  ffffffd772943e80  UN   0.0       0      0  [msm-core:sampli]
   1119   1473   1  ffffffd6ae10f080  UN   2.7 2089184 222932  Binder:1651_5
   1643      1   6  ffffffd757d65780  UN   0.2  133072  15584  Binder:1474_2
   1651   1473   2  ffffffd728b8b200  UN   2.7 2089184 222932  tencent.karaoke
   1659   1473   0  ffffffd728b89900  UN   2.7 2089184 222932  JDWP
   1663   1473   2  ffffffd64894b200  UN   2.7 2089184 222932  HeapTaskDaemon
   1711   1473   2  ffffffd627cff080  UN   2.7 2089184 222932  ccess.Collector
   1749   1473   5  ffffffd705b4f080  UN   2.7 2089184 222932  TcmReceiver
   1805   1473   1  ffffffd728b8e400  UN   2.7 2089184 222932  ReporterMachine
   1818   1473   2  ffffffd64fbe3e80  UN   2.7 2089184 222932  pool-2-thread-1
   1862   1473   2  ffffffd71ba99900  UN   2.7 2089184 222932  pool-4-thread-1
   1864   1473   0  ffffffd7071de400  UN   2.7 2089184 222932  pool-4-thread-3
   1869   1473   1  ffffffd6c3f6a580  UN   2.7 2089184 222932  TcmReceiver
   1872   1472   4  ffffffd75a8abe80  UN   4.2 2678340 350496  android.bg
   1873   1472   6  ffffffd75a8acb00  UN   4.2 2678340 350496  ActivityManager
   1909   1472   6  ffffffd7587e1900  UN   4.2 2678340 350496  android.display
   2107   1473   1  ffffffd75ef54b00  UN   2.7 2089184 222932  PackageProcesso
   2650   1472   0  ffffffd758db2580  UN   4.2 2678340 350496  ConnectivitySer
   2767    601   2  ffffffd6e2180c80  UN   0.0   19556   3888  sdcard
   2980   1472   0  ffffffd63ad2e400  UN   4.2 2678340 350496  watchdog
   3337   1472   0  ffffffd65bb72580  UN   4.2 2678340 350496  Binder:1817_18
  18694      2   3  ffffffd60443a580  UN   0.0       0      0  [mdss_fb0]
  28387      1   6  ffffffd616f6cb00  UN   0.0    9656   1936  ps
  29239      1   2  ffffffd6156f8c80  UN   0.0    9656   1944  ps
  30079      1   6  ffffffd659b72580  UN   0.0    9656   1936  ps
  30271      1   6  ffffffd626a1be80  UN   0.0    9656   1940  ps

一般界面定平跟system_server相关,所以先看system_server的几个关键线程:

crash_ARM64> bt 1909
PID: 1909   TASK: ffffffd7587e1900  CPU: 6   COMMAND: "android.display"
 #0 [ffffffd7587ef8d0] __switch_to at ffffff99a6685560
 #1 [ffffffd7587ef900] __schedule at ffffff99a74df9f0
 #2 [ffffffd7587ef960] schedule at ffffff99a74dfd54
 #3 [ffffffd7587ef980] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd7587ef9a0] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd7587efa00] mutex_lock at ffffff99a74e1664
 #6 [ffffffd7587efa20] binder_alloc_new_buf at ffffff99a71b9f9c
 #7 [ffffffd7587efab0] binder_transaction at ffffff99a71b5400
 #8 [ffffffd7587efbe0] binder_thread_write at ffffff99a71b7500
 #9 [ffffffd7587efcd0] binder_ioctl_write_read at ffffff99a71b7f38
#10 [ffffffd7587efd50] binder_ioctl at ffffff99a71b834c
#11 [ffffffd7587efdf0] do_vfs_ioctl at ffffff99a67c4820
#12 [ffffffd7587efe80] sys_ioctl at ffffff99a67c4958

crash_ARM64> bt 1873
PID: 1873   TASK: ffffffd75a8acb00  CPU: 6   COMMAND: "ActivityManager"
 #0 [ffffffd75ca378d0] __switch_to at ffffff99a6685560
 #1 [ffffffd75ca37900] __schedule at ffffff99a74df9f0
 #2 [ffffffd75ca37960] schedule at ffffff99a74dfd54
 #3 [ffffffd75ca37980] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd75ca379a0] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd75ca37a00] mutex_lock at ffffff99a74e1664
 #6 [ffffffd75ca37a20] binder_alloc_new_buf at ffffff99a71b9f9c
 #7 [ffffffd75ca37ab0] binder_transaction at ffffff99a71b5400
 #8 [ffffffd75ca37be0] binder_thread_write at ffffff99a71b7500
 #9 [ffffffd75ca37cd0] binder_ioctl_write_read at ffffff99a71b7f38
#10 [ffffffd75ca37d50] binder_ioctl at ffffff99a71b834c
#11 [ffffffd75ca37df0] do_vfs_ioctl at ffffff99a67c4820
#12 [ffffffd75ca37e80] sys_ioctl at ffffff99a67c4958

看到都是在binder_alloc_new_buf时候被挂起的,显然是同一个锁,我们得先找出这个锁的地址。

以下是找锁的方法:

首先从mutex_lock函数的汇编代码入手:

crash_ARM64> p mutex_lock
mutex_lock = $26 = 
 {void (struct mutex *)} 0xffffff99a74e163c

crash_ARM64> dis mutex_lock
0xffffff99a74e163c <mutex_lock>:        stp     x29, x30, [sp,#-32]!
0xffffff99a74e1640 <mutex_lock+4>:      mov     x29, sp
0xffffff99a74e1644 <mutex_lock+8>:      str     x19, [sp,#16]
0xffffff99a74e1648 <mutex_lock+12>:     mov     x19, x0
0xffffff99a74e164c <mutex_lock+16>:     prfm    pstl1strm, [x0]
0xffffff99a74e1650 <mutex_lock+20>:     ldaxr   w1, [x0]
0xffffff99a74e1654 <mutex_lock+24>:     sub     w1, w1, #0x1
0xffffff99a74e1658 <mutex_lock+28>:     stxr    w2, w1, [x0]
0xffffff99a74e165c <mutex_lock+32>:     cbnz    w2, 0xffffff99a74e1650
0xffffff99a74e1660 <mutex_lock+36>:     tbz     w1, #31, 0xffffff99a74e1668
0xffffff99a74e1664 <mutex_lock+40>:     bl      0xffffff99a74e14ec
0xffffff99a74e1668 <mutex_lock+44>:     mrs     x0, sp_el0
0xffffff99a74e166c <mutex_lock+48>:     ldr     x0, [x0,#16]
0xffffff99a74e1670 <mutex_lock+52>:     str     x0, [x19,#24]
0xffffff99a74e1674 <mutex_lock+56>:     ldr     x19, [sp,#16]
0xffffff99a74e1678 <mutex_lock+60>:     ldp     x29, x30, [sp],#32
0xffffff99a74e167c <mutex_lock+64>:     ret

mutex_lock的第一个参数就是我们要找的struct mutex,在函数入口处被保存在x19寄存器中,

0xffffff99a74e1648 <mutex_lock+12>:     mov     x19, x0

下一步会调用__mutex_lock_slowpath:

0xffffff99a74e1664 <mutex_lock+40>:     bl      0xffffff99a74e14ec

一般x19寄存器会保存在下一级函数的栈帧中,比如

crash_ARM64> dis __mutex_lock_slowpath
0xffffff99a74e14ec <__mutex_lock_slowpath>:     stp     x29, x30, [sp,#-96]!
0xffffff99a74e14f0 <__mutex_lock_slowpath+4>:   mov     x29, sp
0xffffff99a74e14f4 <__mutex_lock_slowpath+8>:   stp     x19, x20, [sp,#16]
0xffffff99a74e14f8 <__mutex_lock_slowpath+12>:  stp     x21, x22, [sp,#32]
0xffffff99a74e14fc <__mutex_lock_slowpath+16>:  stp     x23, x24, [sp,#48]
0xffffff99a74e1500 <__mutex_lock_slowpath+20>:  mrs     x20, sp_el0
0xffffff99a74e1504 <__mutex_lock_slowpath+24>:  ldr     w1, [x20,#24]
0xffffff99a74e1508 <__mutex_lock_slowpath+28>:  mov     x19, x0
...

因此,我们只需要在__mutex_lock_slowpath的栈帧中找到x19,它就是我们要找的struct mutex了。

通过bt的内容可知,__mutex_lock_slowpath的栈帧范围是ffffffd75ca379a0ffffffd75ca37a00

crash_ARM64> bt 1873
PID: 1873   TASK: ffffffd75a8acb00  CPU: 6   COMMAND: "ActivityManager"
 #0 [ffffffd75ca378d0] __switch_to at ffffff99a6685560
 #1 [ffffffd75ca37900] __schedule at ffffff99a74df9f0
 #2 [ffffffd75ca37960] schedule at ffffff99a74dfd54
 #3 [ffffffd75ca37980] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd75ca379a0] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd75ca37a00] mutex_lock at ffffff99a74e1664
 #6 [ffffffd75ca37a20] binder_alloc_new_buf at ffffff99a71b9f9c
 #7 [ffffffd75ca37ab0] binder_transaction at ffffff99a71b5400
 #8 [ffffffd75ca37be0] binder_thread_write at ffffff99a71b7500
 #9 [ffffffd75ca37cd0] binder_ioctl_write_read at ffffff99a71b7f38
#10 [ffffffd75ca37d50] binder_ioctl at ffffff99a71b834c
#11 [ffffffd75ca37df0] do_vfs_ioctl at ffffff99a67c4820
#12 [ffffffd75ca37e80] sys_ioctl at ffffff99a67c4958

通过rd命令查看这个地址:

crash_ARM64> rd ffffffd75ca379a0 -e ffffffd75ca37a00
ffffffd75ca379a0:  ffffffd75ca37a00 ffffff99a74e1668   .z.\....h.N.....
ffffffd75ca379b0:  ffffffd6dfa02200 ffffffd6dfa02200   ."......."......
ffffffd75ca379c0:  0000000000000000 ffffffd75ca37c80   .........|.\....
ffffffd75ca379d0:  ffffff99a9109000 ffffffd75c1cbc00   ...........\....
ffffffd75ca379e0:  ffffffd700000000 ffffffd7587ef9e8   ..........~X....
ffffffd75ca379f0:  ffffffd758dc39e8 ffffffd75a8acb00   .9.X.......Z....

再对应__mutex_lock_slowpath的汇编代码:

crash_ARM64> dis __mutex_lock_slowpath
0xffffff99a74e14ec <__mutex_lock_slowpath>:     stp     x29, x30, [sp,#-96]!
0xffffff99a74e14f0 <__mutex_lock_slowpath+4>:   mov     x29, sp
0xffffff99a74e14f4 <__mutex_lock_slowpath+8>:   stp     x19, x20, [sp,#16]
0xffffff99a74e14f8 <__mutex_lock_slowpath+12>:  stp     x21, x22, [sp,#32]
0xffffff99a74e14fc <__mutex_lock_slowpath+16>:  stp     x23, x24, [sp,#48]
...

栈帧中的各个寄存器的对应位置如下:

ffffffd75ca379a0:  ffffffd75ca37a00 ffffff99a74e1668   .z.\....h.N.....
            x29              x30
ffffffd75ca379b0:  ffffffd6dfa02200 ffffffd6dfa02200   ."......."......
                   x19              x20
ffffffd75ca379c0:  0000000000000000 ffffffd75ca37c80   .........|.\....
                   x21              x22
ffffffd75ca379d0:  ffffff99a9109000 ffffffd75c1cbc00   ...........\....
                   x23              x24
ffffffd75ca379e0:  ffffffd700000000 ffffffd7587ef9e8   ..........~X....
ffffffd75ca379f0:  ffffffd758dc39e8 ffffffd75a8acb00   .9.X.......Z....

我们要找的struct mutex就是x19,也就是ffffffd6dfa02200

不仅仅是mutex,通过上面的方式任何一个参数都有可能在栈里面找到,这也是栈推导的最基本的方式。

接下来再用crash查看这个值:

crash_ARM64> struct mutex ffffffd6dfa02200
struct mutex {
  count = {
    counter = -4
  }, 
  ...
  owner = 0xffffffd65bb72580, 
  ...
}

其中onwer就是持有该所的线程的task_struct指针。它的pid为:

crash_ARM64> task_struct.pid 0xffffffd65bb72580
  pid = 3337

查看这个线程的调用栈:

crash_ARM64> bt 3337
PID: 3337   TASK: ffffffd65bb72580  CPU: 0   COMMAND: "Binder:1817_18"
 #0 [ffffffd66ccf7870] __switch_to at ffffff99a6685560
 #1 [ffffffd66ccf78a0] __schedule at ffffff99a74df9f0
 #2 [ffffffd66ccf7900] schedule at ffffff99a74dfd54
 #3 [ffffffd66ccf7920] rwsem_down_write_failed at ffffff99a74e2388
 #4 [ffffffd66ccf7990] down_write at ffffff99a74e1a20
 #5 [ffffffd66ccf79b0] binder_update_page_range at ffffff99a71b9938
 #6 [ffffffd66ccf7a20] binder_alloc_new_buf at ffffff99a71ba238
 #7 [ffffffd66ccf7ab0] binder_transaction at ffffff99a71b5400
 #8 [ffffffd66ccf7be0] binder_thread_write at ffffff99a71b7500
 #9 [ffffffd66ccf7cd0] binder_ioctl_write_read at ffffff99a71b7f38
#10 [ffffffd66ccf7d50] binder_ioctl at ffffff99a71b834c
#11 [ffffffd66ccf7df0] do_vfs_ioctl at ffffff99a67c4820
#12 [ffffffd66ccf7e80] sys_ioctl at ffffff99a67c4958

原来这个pid为3337的Binder:1817_18,它持了mutex所以后,又被semaphore锁给挂住了。

下一步就是要看这个semaphore锁是谁持有的。

同样的方法,先看down_write函数:

crash_ARM64> p down_write
down_write = $27 = 
 {void (struct rw_semaphore *)} 0xffffff99a74e19f0

crash_ARM64> dis down_write
0xffffff99a74e19f0 <down_write>:        stp     x29, x30, [sp,#-32]!
0xffffff99a74e19f4 <down_write+4>:      mov     x1, #0xffffffff00000001         // #-4294967295
0xffffff99a74e19f8 <down_write+8>:      mov     x29, sp
0xffffff99a74e19fc <down_write+12>:     str     x19, [sp,#16]
0xffffff99a74e1a00 <down_write+16>:     mov     x19, x0
0xffffff99a74e1a04 <down_write+20>:     prfm    pstl1strm, [x0]
0xffffff99a74e1a08 <down_write+24>:     ldaxr   x2, [x0]
0xffffff99a74e1a0c <down_write+28>:     add     x2, x2, x1
0xffffff99a74e1a10 <down_write+32>:     stxr    w3, x2, [x0]
0xffffff99a74e1a14 <down_write+36>:     cbnz    w3, 0xffffff99a74e1a08
0xffffff99a74e1a18 <down_write+40>:     cmp     x2, x1
0xffffff99a74e1a1c <down_write+44>:     b.eq    0xffffff99a74e1a24
0xffffff99a74e1a20 <down_write+48>:     bl      0xffffff99a74e20b4
0xffffff99a74e1a24 <down_write+52>:     mrs     x0, sp_el0
0xffffff99a74e1a28 <down_write+56>:     ldr     x0, [x0,#16]
0xffffff99a74e1a2c <down_write+60>:     str     x0, [x19,#32]
0xffffff99a74e1a30 <down_write+64>:     ldr     x19, [sp,#16]
0xffffff99a74e1a34 <down_write+68>:     ldp     x29, x30, [sp],#32
0xffffff99a74e1a38 <down_write+72>:     ret

它的参数被保存在x19,因此我们可以去下一级函数rwsem_down_write_failed中查找这个x19的值。

crash_ARM64> dis 0xffffff99a74e20b4
0xffffff99a74e20b4 <rwsem_down_write_failed>:   stp     x29, x30, [sp,#-112]!
0xffffff99a74e20b8 <rwsem_down_write_failed+4>: mov     x29, sp
0xffffff99a74e20bc <rwsem_down_write_failed+8>: stp     x19, x20, [sp,#16]
0xffffff99a74e20c0 <rwsem_down_write_failed+12>:        stp     x21, x22, [sp,#32]
0xffffff99a74e20c4 <rwsem_down_write_failed+16>:        mov     x19, x0
0xffffff99a74e20c8 <rwsem_down_write_failed+20>:        str     x25, [sp,#64]
0xffffff99a74e20cc <rwsem_down_write_failed+24>:        stp     x23, x24, [sp,#48]
0xffffff99a74e20d0 <rwsem_down_write_failed+28>:        mov     x0, #0xffffffff                 // #4294967295
0xffffff99a74e20d4 <rwsem_down_write_failed+32>:        prfm    pstl1strm, [x19]
...

x19保存在rwsem_down_write_failed栈帧偏移16的位置。rwsem_down_write_failed栈帧的地址是ffffffd66ccf7920。

 #3 [ffffffd66ccf7920] rwsem_down_write_failed at ffffff99a74e2388
 #4 [ffffffd66ccf7990] down_write at ffffff99a74e1a20

因此可以直接得到x19的值为ffffffd76e349a68:

crash_ARM64> rd ffffffd66ccf7920 -e ffffffd66ccf7990
ffffffd66ccf7920:  ffffffd66ccf7990 ffffff99a74e1a24   .y.l....$.N.....
                   x29              x30
ffffffd66ccf7930:  ffffffd76e349a68 ffffffd6dfa02200   h.4n....."......
                   x19              x20
ffffffd66ccf7940:  ffffffd76e349a00 0000000000000000   ..4n............
ffffffd66ccf7950:  ffffff801250dee8 0000000000000000   ..P.............
ffffffd66ccf7960:  ffffff801250e000 ffffff99a68e2340   ..P.....@#......
ffffffd66ccf7970:  ffffffd6a3937d70 ffffffd70e1abd70   p}......p.......
ffffffd66ccf7980:  ffffffd65bb72580 ffffff9900000000   .%.[............

参数struct rw_semaphore的指针为ffffffd76e349a68:

crash_ARM64> struct -x rw_semaphore ffffffd76e349a68
struct rw_semaphore {
  count = 0xffffffff00000001, 
  ...
  owner = 0x0
}

由于没有打开CONFIG_RWSEM_SPIN_ON_OWNER宏,所以owner为0。

其实mutex也是需要打开指定的宏(CONFIG_DEBUG_MUTEXES或CONFIG_MUTEX_SPIN_ON_OWNER),才会记录owner的。

没有记录owner,再要查找哪个线程持有锁就比较难了。

先从代码中看看被锁住的是哪个锁:

static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
                    void *start, void *end,
                    struct vm_area_struct *vma)
{
    ...
    if (vma)
        mm = NULL;
    else
        mm = get_task_mm(alloc->tsk);

    if (mm) {
        down_write(&mm->mmap_sem);
        vma = alloc->vma;
        ...

原来是mm->mmap_sem,这个是个大锁,很多地方都会申请这个锁。

我们可以通过脚本遍历每个task的栈,来找出哪个线程了持了这个semaphore,具体思路后面会讲到,这里我们先换一种方法。

我们先查看所有D状态的线程,他们都是被哪些锁给block了。

通过bt命令一个个查找调用栈后,发现绝大部分都是mm->mmap_sem锁和前面的mutex锁给block的,如:

crash_ARM64> bt 1749
PID: 1749   TASK: ffffffd705b4f080  CPU: 5   COMMAND: "TcmReceiver"
 #0 [ffffffd6db78fc90] __switch_to at ffffff99a6685560
 #1 [ffffffd6db78fcc0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6db78fd20] schedule at ffffff99a74dfd54
 #3 [ffffffd6db78fd40] rwsem_down_read_failed at ffffff99a74e2094
 #4 [ffffffd6db78fd90] down_read at ffffff99a74e19e4
 #5 [ffffffd6db78fda0] do_page_fault at ffffff99a6696f00
 #6 [ffffffd6db78fe10] do_mem_abort at ffffff99a6680ad8

只有一个sdcard线程是被其他锁给block的:

crash_ARM64> bt 2767
PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
 #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
 #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
 #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
 #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
 #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
 #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
 #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
#10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
#11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

通过前面的方法可知,这个mutex锁的指针是存放在__mutex_lock_slowpath的栈帧地址+16的位置,这里就是ffffffd6e218fba0

crash_ARM64> rd ffffffd6e218fba0
ffffffd6e218fba0:  ffffffd6948f4090

因此,这里的mutex的指针为ffffffd6948f4090,由于mutex已经打开了DEBUG宏,所以很容易得到owner:

crash_ARM64> struct mutex ffffffd6948f4090
struct mutex {
  count = {
    counter = -1
  }, 
  ...
  owner = 0xffffffd771be3e80, 
  ...
}

前面提到owner就是持有这个mutex锁的线程的task_struct指针,其pid为:

crash_ARM64> struct task_struct.pid 0xffffffd771be3e80
  pid = 2124

其调用栈为:

crash_ARM64> bt 2124
PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
 #0 [ffffffd6d396b4b0] __switch_to at ffffff99a6685560
 #1 [ffffffd6d396b4e0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6d396b540] schedule at ffffff99a74dfd54
 #3 [ffffffd6d396b560] request_wait_answer at ffffff99a68c0350
 #4 [ffffffd6d396b5d0] __fuse_request_send at ffffff99a68c2f60
 #5 [ffffffd6d396b600] fuse_request_send at ffffff99a68c2fb8
 #6 [ffffffd6d396b610] fuse_send_readpages at ffffff99a68c8ef8
 #7 [ffffffd6d396b640] fuse_readpages at ffffff99a68c8fe8
 #8 [ffffffd6d396b6c0] __do_page_cache_readahead at ffffff99a676cfc0
 #9 [ffffffd6d396b780] filemap_fault at ffffff99a6762fc4
#10 [ffffffd6d396b7f0] __do_fault at ffffff99a6789c38
#11 [ffffffd6d396b860] handle_mm_fault at ffffff99a678d540
#12 [ffffffd6d396b920] do_page_fault at ffffff99a6696f4c
#13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
#14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
     PC: ffffffd7017056c0  [unknown or invalid address]
     LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
     SP: 0000000080000145  PSTATE: 00000000
    X29: ffffffd6d396bb70  X28: ffffff99a67619a0  X27: ffffffd6d396bb80
    X26: 0000000000001000  X25: ffffff99a7604000  X24: 0000000000000000
    X23: ffffff99a761b580  X22: ffffffd623366500  X21: 0000000000001000
    X20: ffffffd6948f4140  X19: 0000000000000000  X18: 0000000000000000
    X17: ffffffd6d396be08  X16: 0000000000000000  X15: 0000000000000000
    X14: ffffff99a668f4a0  X13: 0000000000000000  X12: 00000000ee04c905
    X11: 00000000d0bf8f50  X10: 0000000000000018   X9: 0101010101010101
     X8: 7f7f7f7f7f7f7f7f   X7: 722433ffff787368   X6: 7f7f7f7f7f7f7fff
     X5: 0000000000002800   X4: 0000008080000000   X3: ffffffd6d396bdd0
     X2: 0000000000002800   X1: 00000000d5d3e000   X0: 0000000000000000
#15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
#16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4
#17 [ffffffd6d396bd00] fuse_passthrough_write_iter at ffffff99a68cd4d0
#18 [ffffffd6d396bd50] fuse_file_write_iter at ffffff99a68ca1f4
#19 [ffffffd6d396bdb0] __vfs_write at ffffff99a67b49a0
#20 [ffffffd6d396be30] vfs_write at ffffff99a67b515c
#21 [ffffffd6d396be70] sys_pwrite64 at ffffff99a67b5a1c

注意这里的do_page_fault,从前面1749线程的的调用栈可知do_page_fault是需要持有mm->mmap_sem锁的,

但这里却没有被block,很可能mm->mmap_sem锁的owner就是这个2124线程,查看代码可确认这一点:

static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                   struct pt_regs *regs)
{
    struct task_struct *tsk;
    struct mm_struct *mm;
    ...
    if (!down_read_trylock(&mm->mmap_sem)) {
       if (!user_mode(regs) && !search_exception_tables(regs->pc))
            goto no_context;
retry:
        down_read(&mm->mmap_sem);
   ...
   fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
   ...


static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
               unsigned int mm_flags, unsigned long vm_flags,
               struct task_struct *tsk)
{
    ...
    return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);

当前函数已经执行到handle_mm_fault以下了,就能确定已经持有mm->mmap_sem锁了。

因此前面所有的线程其实都是被2124线程给block的,现在只需要解决2124线程为什么被block就可以了。

crash_ARM64> ps 2124
   PID    PPID  CPU       TASK        ST  %MEM     VSZ    RSS  COMM
   2124   1473   0  ffffffd771be3e80  IN   2.7 2089184 222932  wnloader.impl.c

2124线程是处于IN状态,也就是说它并不是被锁住的。

从调用栈中可以看到2124线程其实是在等待fuse的处理结果,我们先通过栈推导,算出fuse_req的地址为ffffffd6db690d00

crash_ARM64> struct -x fuse_req ffffffd6db690d00
struct fuse_req {
  list = {
    next = 0xffffffd7063d8000, 
    prev = 0xffffffd76f868680
  }, 
  intr_entry = {
    next = 0xffffffd6db690d10, 
    prev = 0xffffffd6db690d10
  }, 
  count = {
    counter = 0x2
  }, 
  intr_unique = 0x0, 
  flags = 0x89, 
  ...

其中flags = 0x89 = 0b10001001

而代码中:

enum fuse_req_flag {
	FR_ISREPLY,		1	
	FR_FORCE,		0
	FR_BACKGROUND,	        0
	FR_WAITING,		1
	FR_ABORTED,		0
	FR_INTERRUPTED,	        0
	FR_LOCKED,		0
	FR_PENDING,		1
	FR_SENT,		
	FR_FINISHED,
	FR_PRIVATE,
};

因此当前请求的状态为FR_ISREPLY|FR_WAITING|FR_PENDING,注意当前状态请求的状态是pending状态,

这表示fuse的deamon进程在忙碌,还没取走当前请求,也就是说deamon进程还在忙着处理前一个请求。

fuse的deamon进程就是sdcard进程,而fuse的请求一般都是sdcard进程来处理的。

我们看一下sdcard进程的状态:

crash_ARM64> ps -g 2767
PID: 2755   TASK: ffffffd75b29e400  CPU: 5   COMMAND: "sdcard"
  PID: 2764   TASK: ffffffd75b29f080  CPU: 0   COMMAND: "sdcard"
  PID: 2765   TASK: ffffffd75b29be80  CPU: 2   COMMAND: "sdcard"
  PID: 2766   TASK: ffffffd6e2180000  CPU: 4   COMMAND: "sdcard"
  PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
crash_ARM64> bt 2765
PID: 2765   TASK: ffffffd75b29be80  CPU: 2   COMMAND: "sdcard"
 #0 [ffffffd6e217fbf0] __switch_to at ffffff99a6685560
 #1 [ffffffd6e217fc20] __schedule at ffffff99a74df9f0
 #2 [ffffffd6e217fc80] schedule at ffffff99a74dfd54
 #3 [ffffffd6e217fca0] fuse_dev_do_read at ffffff99a68c1f50
 #4 [ffffffd6e217fd50] fuse_dev_read at ffffff99a68c2568
 #5 [ffffffd6e217fdb0] __vfs_read at ffffff99a67b48d8
 #6 [ffffffd6e217fe30] vfs_read at ffffff99a67b5038
 #7 [ffffffd6e217fe70] sys_read at ffffff99a67b5888
 #8 [ffffffd6e217fed0] el0_svc_naked at ffffff99a668462c

crash_ARM64> bt 2766
PID: 2766   TASK: ffffffd6e2180000  CPU: 4   COMMAND: "sdcard"
 #0 [ffffffd6e218bbf0] __switch_to at ffffff99a6685560
 #1 [ffffffd6e218bc20] __schedule at ffffff99a74df9f0
 #2 [ffffffd6e218bc80] schedule at ffffff99a74dfd54
 #3 [ffffffd6e218bca0] fuse_dev_do_read at ffffff99a68c1f50
 #4 [ffffffd6e218bd50] fuse_dev_read at ffffff99a68c2568
 #5 [ffffffd6e218bdb0] __vfs_read at ffffff99a67b48d8
 #6 [ffffffd6e218be30] vfs_read at ffffff99a67b5038
 #7 [ffffffd6e218be70] sys_read at ffffff99a67b5888
 #8 [ffffffd6e218bed0] el0_svc_naked at ffffff99a668462c

crash_ARM64> bt 2767
PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
 #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
 #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
 #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
 #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
 #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
 #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
 #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
#10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
#11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

它的3个工作线程2765 2766 2767中,前两个是在等待状态,而2767线程是处在被2124锁住的状态。

2124线程在等这2767线程,2767线程又被2124线程给锁住,也就是死锁状态了!

 

先看看2767的状态:

crash_ARM64> bt 2767
PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
 #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
 #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
 #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
 #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
 #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
 #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
 #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
 #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
 #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
#10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
#11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

从调用栈中可以看到吃锁的地方是do_truncate(),其源码为:

int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp)
{
	...
	mutex_lock(&dentry->d_inode->i_mutex);

这里我们再来一次栈推导,算出dentry的值:

首先,查看do_truncate的汇编代码:

crash_ARM64> dis do_truncate
0xffffff99a67b3554 <do_truncate>:       stp     x29, x30, [sp,#-112]!
0xffffff99a67b3558 <do_truncate+4>:     mov     w4, #0xffffffea                 // #-22
0xffffff99a67b355c <do_truncate+8>:     mov     x29, sp
0xffffff99a67b3560 <do_truncate+12>:    stp     x19, x20, [sp,#16]
0xffffff99a67b3564 <do_truncate+16>:    mov     x19, x0
0xffffff99a67b3568 <do_truncate+20>:    tbnz    x1, #63, 0xffffff99a67b35e4
0xffffff99a67b356c <do_truncate+24>:    orr     w0, w2, #0x8
0xffffff99a67b3570 <do_truncate+28>:    str     x1, [x29,#48]
0xffffff99a67b3574 <do_truncate+32>:    str     w0, [x29,#32]
0xffffff99a67b3578 <do_truncate+36>:    cbz     x3, 0xffffff99a67b358c
0xffffff99a67b357c <do_truncate+40>:    mov     w0, #0x2008                     // #8200
0xffffff99a67b3580 <do_truncate+44>:    str     x3, [x29,#104]
0xffffff99a67b3584 <do_truncate+48>:    orr     w2, w2, w0
0xffffff99a67b3588 <do_truncate+52>:    str     w2, [x29,#32]
0xffffff99a67b358c <do_truncate+56>:    mov     x0, x19
0xffffff99a67b3590 <do_truncate+60>:    bl      0xffffff99a67cb704
0xffffff99a67b3594 <do_truncate+64>:    cmp     w0, wzr
0xffffff99a67b3598 <do_truncate+68>:    mov     w4, w0
0xffffff99a67b359c <do_truncate+72>:    b.lt    0xffffff99a67b35e4
0xffffff99a67b35a0 <do_truncate+76>:    b.eq    0xffffff99a67b35b4
0xffffff99a67b35a4 <do_truncate+80>:    ldr     w1, [x29,#32]
0xffffff99a67b35a8 <do_truncate+84>:    orr     w1, w1, #0x200
0xffffff99a67b35ac <do_truncate+88>:    orr     w0, w1, w0
0xffffff99a67b35b0 <do_truncate+92>:    str     w0, [x29,#32]
0xffffff99a67b35b4 <do_truncate+96>:    ldr     x0, [x19,#48]
0xffffff99a67b35b8 <do_truncate+100>:   add     x0, x0, #0xa8
0xffffff99a67b35bc <do_truncate+104>:   bl      0xffffff99a74e163c
...

第一个参数struct dentry的值x0,保存在x19寄存器中。

0xffffff99a67b35bc行中mutex_lock函数之前x19一直没变过。

因此我们可以在mutex_lock的栈帧中找到这个x19。

crash_ARM64> dis mutex_lock
0xffffff99a74e163c <mutex_lock>:        stp     x29, x30, [sp,#-32]!
0xffffff99a74e1640 <mutex_lock+4>:      mov     x29, sp
0xffffff99a74e1644 <mutex_lock+8>:      str     x19, [sp,#16]
...

mutex_lock栈帧地址是0xffffffd6e218fbf0,因此x19是保存在0xffffffd6e218fbf0+0x10=0xffffffd6e218fc00这个地址上。

crash_ARM64> rd 0xffffffd6e218fc00
ffffffd6e218fc00:  ffffffd7017056c0

crash_ARM64> struct dentry ffffffd7017056c0
struct dentry {
  d_flags = 4718784, 
  ...
  d_parent = 0xffffffd6aac46e40, 
  d_name = {
    name = 0xffffffd7017056f8 "test1001"
  }, 
  d_inode = 0xffffffd6948f3fe8, 
  ...

也就是说2767线程在open并truncate名为test1001的文件时,被该文件的inode的mutex给锁住了。

通过parent可以查找上一级的dentry以及文件名,也能推出全路径:

crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac46e40
  d_parent = 0xffffffd6aac46540
  d_name.name = 0xffffffd6aac46e78 "speedMeasure"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac46540
  d_parent = 0xffffffd6aac466c0
  d_name.name = 0xffffffd6aac46578 "files"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac466c0
  d_parent = 0xffffffd75d5a0000
  d_name.name = 0xffffffd6aac466f8 "com.tencent.karaoke"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d5a0000
  d_parent = 0xffffffd6473103c0
  d_name.name = 0xffffffd75d5a0038 "data"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6473103c0
  d_parent = 0xffffffd771750cc0
  d_name.name = 0xffffffd6473103f8 "Android"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd771750cc0
  d_parent = 0xffffffd7715c8a80
  d_name.name = 0xffffffd771750cf8 "0"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd7715c8a80
  d_parent = 0xffffffd6743d2180
  d_name.name = 0xffffffd7715c8ab8 "media"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6743d2180
  d_parent = 0xffffffd6743d2180
  d_name.name = 0xffffffd6743d21b8 "/"

完整的路径为:“/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001

其实也可以通过调用栈中do_sys_open函数和do_filp_open函数的栈推导也能推出这个文件名:

 #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
 #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4

其中do_filp_open的定义如下:

crash_ARM64> p do_filp_open
do_filp_open = $31 = 
 {struct file *(int, struct filename *, const struct open_flags *)} 0xffffff99a67c26c0

我们需要知道do_filp_open的第二个参数struct filename指针,也就是说x1的值。

为此我们查找它上一级函数do_sys_open在调用do_filp_open前的寄存器,特别是x1的赋值的状态:

crash_ARM64> dis ffffff99a67b44b4 5
0xffffff99a67b44b4 <do_sys_open+336>:   tbnz    w21, #31, 0xffffff99a67b453c
0xffffff99a67b44b8 <do_sys_open+340>:   mov     w0, w20
0xffffff99a67b44bc <do_sys_open+344>:   mov     x1, x19
0xffffff99a67b44c0 <do_sys_open+348>:   add     x2, x29, #0x48
0xffffff99a67b44c4 <do_sys_open+352>:   bl      0xffffff99a67c26c0

x1是从x19赋值的,因此我们在do_filp_open的栈帧中查找x19的值就可以了。

首先看下do_filp_open的汇编代码,看它是将x19保存在栈帧的哪个位置:

crash_ARM64> dis do_filp_open
0xffffff99a67c26c0 <do_filp_open>:      stp     x29, x30, [sp,#-256]!
0xffffff99a67c26c4 <do_filp_open+4>:    mov     w4, w0
0xffffff99a67c26c8 <do_filp_open+8>:    mov     x3, x1
0xffffff99a67c26cc <do_filp_open+12>:   mov     x29, sp
0xffffff99a67c26d0 <do_filp_open+16>:   stp     x19, x20, [sp,#16]
...

也是保存在栈帧起始位置+16的地方,也就是0xffffffd6e218fd50+0x10=0xffffffd6e218fd60

crash_ARM64> rd 0xffffffd6e218fd60
ffffffd6e218fd60:  ffffffd680d36000

因此,x19也就是struct filename指针的值为ffffffd680d36000:

crash_ARM64> struct filename ffffffd680d36000
struct filename {
  name = 0xffffffd680d3601c "/data/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001", 
  uptr = 0x7f81401380 <Address 0x7f81401380 out of bounds>, 
  aname = 0x0, 
  refcnt = 1, 
  iname = 0xffffffd680d3601c "/data/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001"
}

跟前面的结论差不多,唯一不同的是这里多了/data目录,这里就不深究了(我也不清楚为什么,可能是命名空间或者进程工作目录之类的原因吧)。

反正可以确定的是,当前sdcard的工作线程2767是在open sdcard上的com.tencent.karaoke这个应用的某个文件。

而跟它产生死锁的com.tencent.karaoke应用的2124线程,确是在write文件,而且当前的fuse_req是pending状态。

因此可以推测com.tencent.karaoke应用进程中还有一个线程在打开test1001这个文件,并且用了O_TRUNC这个flag(要不然open不会调用do_truncate)。

为了证明这个,先看一下com.tencent.karaoke应用的所有线程的状态:

crash_ARM64> ps -g 2124
PID: 1651   TASK: ffffffd728b8b200  CPU: 2   COMMAND: "tencent.karaoke"
  PID: 1119   TASK: ffffffd6ae10f080  CPU: 1   COMMAND: "Binder:1651_5"
  PID: 1657   TASK: ffffffd6e942f080  CPU: 0   COMMAND: "Jit thread pool"
  PID: 1658   TASK: ffffffd728b88000  CPU: 4   COMMAND: "Signal Catcher"
  PID: 1659   TASK: ffffffd728b89900  CPU: 0   COMMAND: "JDWP"
  PID: 1660   TASK: ffffffd728b8be80  CPU: 0   COMMAND: "ReferenceQueueD"
  PID: 1661   TASK: ffffffd64894a580  CPU: 0   COMMAND: "FinalizerDaemon"
  PID: 1662   TASK: ffffffd64fbe1900  CPU: 0   COMMAND: "FinalizerWatchd"
  PID: 1663   TASK: ffffffd64894b200  CPU: 2   COMMAND: "HeapTaskDaemon"
  PID: 1664   TASK: ffffffd64894f080  CPU: 4   COMMAND: "Binder:1651_1"
  PID: 1665   TASK: ffffffd627caa580  CPU: 4   COMMAND: "Binder:1651_2"
  PID: 1667   TASK: ffffffd64894e400  CPU: 4   COMMAND: "FileObserver"
  PID: 1673   TASK: ffffffd627ca9900  CPU: 4   COMMAND: "Profile Saver"
  PID: 1680   TASK: ffffffd64fbe6400  CPU: 4   COMMAND: "Binder:1651_3"
  PID: 1690   TASK: ffffffd66d819900  CPU: 1   COMMAND: "ent.File.Tracer"
  PID: 1693   TASK: ffffffd66d818000  CPU: 0   COMMAND: "Binder:1651_4"
  PID: 1707   TASK: ffffffd6e9428000  CPU: 1   COMMAND: "pool-1-thread-1"
  PID: 1708   TASK: ffffffd6e942be80  CPU: 0   COMMAND: ".Event.Notifier"
  PID: 1709   TASK: ffffffd627caf080  CPU: 0   COMMAND: "Service.Invoker"
  PID: 1710   TASK: ffffffd627cfcb00  CPU: 0   COMMAND: "Timeout.Monitor"
  PID: 1711   TASK: ffffffd627cff080  CPU: 2   COMMAND: "ccess.Collector"
  PID: 1712   TASK: ffffffd627cfd780  CPU: 1   COMMAND: "pool-1-thread-2"
  PID: 1713   TASK: ffffffd6b6e45780  CPU: 5   COMMAND: "priority-thread"
  PID: 1730   TASK: ffffffd70ea54b00  CPU: 5   COMMAND: "le.common.b$1$1"
  PID: 1731   TASK: ffffffd632759900  CPU: 6   COMMAND: "le.common.b$1$1"
  PID: 1732   TASK: ffffffd63275cb00  CPU: 0   COMMAND: "TbsHandlerThrea"
  PID: 1734   TASK: ffffffd64fbe2580  CPU: 0   COMMAND: "SDKStarter"
  PID: 1735   TASK: ffffffd63275e400  CPU: 0   COMMAND: "NetworkWatcher"
  PID: 1745   TASK: ffffffd6d7fbf080  CPU: 6   COMMAND: "priority-thread"
  PID: 1747   TASK: ffffffd705b4d780  CPU: 0   COMMAND: "priority-thread"
  PID: 1749   TASK: ffffffd705b4f080  CPU: 5   COMMAND: "TcmReceiver"
  PID: 1794   TASK: ffffffd6e20aa580  CPU: 0   COMMAND: "Okio Watchdog"
  PID: 1804   TASK: ffffffd648948c80  CPU: 0   COMMAND: "YunYingReporter"
  PID: 1805   TASK: ffffffd728b8e400  CPU: 1   COMMAND: "ReporterMachine"
  PID: 1810   TASK: ffffffd6d7fbe400  CPU: 1   COMMAND: "business-defaul"
  PID: 1811   TASK: ffffffd6cd39be80  CPU: 4   COMMAND: "priority-thread"
  PID: 1812   TASK: ffffffd683c81900  CPU: 4   COMMAND: "tencent.karaoke"
  PID: 1818   TASK: ffffffd64fbe3e80  CPU: 2   COMMAND: "pool-2-thread-1"
  PID: 1830   TASK: ffffffd648948000  CPU: 5   COMMAND: "RenderThread"
  PID: 1831   TASK: ffffffd6d20f8c80  CPU: 0   COMMAND: "ConnectivityThr"
  PID: 1833   TASK: ffffffd61bfba580  CPU: 5   COMMAND: "resolver_thread"
  PID: 1842   TASK: ffffffd6d7fb8c80  CPU: 0   COMMAND: "CheckDropFrame"
  PID: 1850   TASK: ffffffd6c6bdcb00  CPU: 7   COMMAND: "hwuiTask1"
  PID: 1856   TASK: ffffffd6b6d6d780  CPU: 7   COMMAND: "business-defaul"
  PID: 1858   TASK: ffffffd633b20000  CPU: 7   COMMAND: "pool-3-thread-1"
  PID: 1862   TASK: ffffffd71ba99900  CPU: 2   COMMAND: "pool-4-thread-1"
  PID: 1863   TASK: ffffffd729b89900  CPU: 2   COMMAND: "pool-4-thread-2"
  PID: 1864   TASK: ffffffd7071de400  CPU: 0   COMMAND: "pool-4-thread-3"
  PID: 1869   TASK: ffffffd6c3f6a580  CPU: 1   COMMAND: "TcmReceiver"
  PID: 1878   TASK: ffffffd6d1c48000  CPU: 0   COMMAND: "pool-1-thread-3"
  PID: 1903   TASK: ffffffd6cfba6400  CPU: 0   COMMAND: "ent.File.Tracer"
  PID: 1914   TASK: ffffffd66a407080  CPU: 5   COMMAND: "pool-5-thread-1"
  PID: 1915   TASK: ffffffd66a405780  CPU: 0   COMMAND: "MidService"
  PID: 1920   TASK: ffffffd6d1c49900  CPU: 0   COMMAND: "t.handlerthread"
  PID: 1921   TASK: ffffffd67358f080  CPU: 1   COMMAND: "SDK_SUB"
  PID: 1923   TASK: ffffffd62cbe3200  CPU: 7   COMMAND: "pool-8-thread-1"
  PID: 1925   TASK: ffffffd6373ab200  CPU: 7   COMMAND: "pool-6-thread-1"
  PID: 1969   TASK: ffffffd648f03200  CPU: 0   COMMAND: "HttpManager"
  PID: 1988   TASK: ffffffd71619be80  CPU: 7   COMMAND: "pool-9-thread-1"
  PID: 2102   TASK: ffffffd690635780  CPU: 1   COMMAND: "SenderManager"
  PID: 2107   TASK: ffffffd75ef54b00  CPU: 1   COMMAND: "PackageProcesso"
  PID: 2120   TASK: ffffffd75ef50c80  CPU: 2   COMMAND: "wnloader.impl.c"
  PID: 2122   TASK: ffffffd690630c80  CPU: 0   COMMAND: "resolver_thread"
  PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
  PID: 2125   TASK: ffffffd690636400  CPU: 1   COMMAND: "resolver_thread"
  PID: 2126   TASK: ffffffd68ac03200  CPU: 2   COMMAND: "pool-2-thread-2"
  PID: 2128   TASK: ffffffd6becd5780  CPU: 4   COMMAND: "le.common.b$1$1"
  PID: 2134   TASK: ffffffd6cfacb200  CPU: 0   COMMAND: "karaoke_image_d"
  PID: 2135   TASK: ffffffd737c43e80  CPU: 2   COMMAND: "hwuiTask2"
  PID: 2247   TASK: ffffffd75b30be80  CPU: 4   COMMAND: "database-thread"
  PID: 2282   TASK: ffffffd6e5e05780  CPU: 2   COMMAND: "database-thread"
  PID: 2395   TASK: ffffffd7059a0c80  CPU: 0   COMMAND: "service_process"
  PID: 2684   TASK: ffffffd6a39fbe80  CPU: 2   COMMAND: "Wns.File.Tracer"
  PID: 2719   TASK: ffffffd6a39f8c80  CPU: 3   COMMAND: "pool-11-thread-"
  PID: 2725   TASK: ffffffd66a403200  CPU: 6   COMMAND: "RenderThread"
  PID: 2726   TASK: ffffffd66a404b00  CPU: 6   COMMAND: "RenderThread"
  PID: 2727   TASK: ffffffd66a402580  CPU: 7   COMMAND: "RenderThread"
  PID: 2728   TASK: ffffffd6ba9ccb00  CPU: 6   COMMAND: "RenderThread"
  PID: 2732   TASK: ffffffd6ca225780  CPU: 6   COMMAND: "RenderThread"
  PID: 2733   TASK: ffffffd76a711900  CPU: 6   COMMAND: "RenderThread"
  PID: 2743   TASK: ffffffd65bb73e80  CPU: 6   COMMAND: "RenderThread"
  PID: 2744   TASK: ffffffd6dff60000  CPU: 3   COMMAND: "RenderThread"
  PID: 4224   TASK: ffffffd7035dcb00  CPU: 0   COMMAND: "NanoHttpd Main "
  PID: 4228   TASK: ffffffd637fae400  CPU: 1   COMMAND: "delete-song-cac"
  PID: 4328   TASK: ffffffd6e52ebe80  CPU: 1   COMMAND: "business-extra-"
  PID: 6490   TASK: ffffffd6d86dcb00  CPU: 4   COMMAND: "TaskSchedulerSe"
  PID: 6491   TASK: ffffffd6d86df080  CPU: 2   COMMAND: "TaskSchedulerBa"
  PID: 6492   TASK: ffffffd6d86dbe80  CPU: 2   COMMAND: "TaskSchedulerBa"
  PID: 6493   TASK: ffffffd6d86dd780  CPU: 2   COMMAND: "TaskSchedulerFo"
  PID: 6494   TASK: ffffffd63df65780  CPU: 2   COMMAND: "TaskSchedulerFo"
  PID: 6495   TASK: ffffffd6ca224b00  CPU: 0   COMMAND: "Chrome_DBThread"
  PID: 6496   TASK: ffffffd6734a5780  CPU: 0   COMMAND: "Chrome_FileThre"
  PID: 6497   TASK: ffffffd6a7b96400  CPU: 1   COMMAND: "Chrome_FileUser"
  PID: 6498   TASK: ffffffd6a7b90000  CPU: 0   COMMAND: "Chrome_ProcessL"
  PID: 6499   TASK: ffffffd6a7b92580  CPU: 0   COMMAND: "Chrome_CacheThr"
  PID: 6500   TASK: ffffffd6a7b93200  CPU: 2   COMMAND: "Chrome_IOThread"
  PID: 6503   TASK: ffffffd6a7b94b00  CPU: 5   COMMAND: "IndexedDB"
  PID: 6504   TASK: ffffffd71b2b7080  CPU: 2   COMMAND: "Thread-45"
  PID: 6505   TASK: ffffffd72bd84b00  CPU: 1   COMMAND: "Thread-46"
  PID: 6508   TASK: ffffffd71b2b3e80  CPU: 6   COMMAND: "CleanupReferenc"
  PID: 6510   TASK: ffffffd71b2b1900  CPU: 2   COMMAND: "ConnectivityMan"
  PID: 6511   TASK: ffffffd71b2b6400  CPU: 0   COMMAND: "CookieMonsterCl"
  PID: 6512   TASK: ffffffd71b2b5780  CPU: 5   COMMAND: "CookieMonsterBa"
  PID: 6513   TASK: ffffffd72bd83e80  CPU: 2   COMMAND: "Chrome_InProcRe"
  PID: 6514   TASK: ffffffd681bf5780  CPU: 5   COMMAND: "Chrome_ChildIOT"
  PID: 6515   TASK: ffffffd6e2e25780  CPU: 7   COMMAND: "GpuMemoryThread"
  PID: 6520   TASK: ffffffd61b65e400  CPU: 1   COMMAND: "Compositor"
  PID: 6521   TASK: ffffffd61b65be80  CPU: 6   COMMAND: "Renderer::FILE"
  PID: 6522   TASK: ffffffd61b65f080  CPU: 6   COMMAND: "CompositorTileW"
  PID: 6523   TASK: ffffffd72bd81900  CPU: 5   COMMAND: "CompositorTileW"
  PID: 6524   TASK: ffffffd70e808c80  CPU: 1   COMMAND: "Chrome_InProcGp"
  PID: 6525   TASK: ffffffd6ae108000  CPU: 3   COMMAND: "SimpleCacheWork"
  PID: 6526   TASK: ffffffd71b2b3200  CPU: 3   COMMAND: "Chrome_ChildIOT"
  PID: 6529   TASK: ffffffd6ae109900  CPU: 3   COMMAND: "SimpleCacheWork"
  PID: 6546   TASK: ffffffd758750c80  CPU: 6   COMMAND: "HTMLParserThrea"
  PID: 6556   TASK: ffffffd6ae7f8000  CPU: 1   COMMAND: "ScriptStreamerT"
  PID: 8502   TASK: ffffffd76ed58000  CPU: 1   COMMAND: "KaraProxyPlayer"
  PID: 15621  TASK: ffffffd68116cb00  CPU: 5   COMMAND: "le.common.b$1$1"
  PID: 15689  TASK: ffffffd687cea580  CPU: 2   COMMAND: "resolver_thread"
  PID: 15822  TASK: ffffffd68ac11900  CPU: 2   COMMAND: "resolver_thread"
  PID: 28112  TASK: ffffffd603048c80  CPU: 7   COMMAND: "resolver_thread"

(注:还有另外一种遍历方式crash_ARM64> list task_struct.thread_group -s task_struct.pid -h ffffffd728b8b200)

挨个用bt命令查看,最终定位到:

crash_ARM64> bt 2120
PID: 2120 TASK: ffffffd75ef50c80 CPU: 2 COMMAND: "wnloader.impl.c"
#0 [ffffffd6e08478f0] __switch_to at ffffff99a6685560
#1 [ffffffd6e0847920] __schedule at ffffff99a74df9f0
#2 [ffffffd6e0847980] schedule at ffffff99a74dfd54
#3 [ffffffd6e08479a0] request_wait_answer at ffffff99a68c0350
#4 [ffffffd6e0847a10] __fuse_request_send at ffffff99a68c2f60
#5 [ffffffd6e0847a40] fuse_request_send at ffffff99a68c2fb8
#6 [ffffffd6e0847a50] fuse_simple_request at ffffff99a68c3604
#7 [ffffffd6e0847a80] fuse_send_open at ffffff99a68c64d8
#8 [ffffffd6e0847b40] fuse_do_open at ffffff99a68c7d54
#9 [ffffffd6e0847bb0] fuse_open_common at ffffff99a68c8010
#10 [ffffffd6e0847bf0] fuse_open at ffffff99a68c8058
#11 [ffffffd6e0847c00] do_dentry_open at ffffff99a67b309c
#12 [ffffffd6e0847c50] vfs_open at ffffff99a67b4144
#13 [ffffffd6e0847c80] path_openat at ffffff99a67c1870
#14 [ffffffd6e0847d50] do_filp_open at ffffff99a67c26f8
#15 [ffffffd6e0847e50] do_sys_open at ffffff99a67b44c4
#16 [ffffffd6e0847eb0] compat_sys_openat at ffffff99a67fbb68

通过前面的栈推导方式,我们可以得到这个文件名:

crash_ARM64> struct filename ffffffd680d33000
struct filename {
  name = 0xffffffd680d3301c "/storage/emulated/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001", 
  uptr = 0xd1dbe960 <Address 0xd1dbe960 out of bounds>, 
  aname = 0x0, 
  refcnt = 1, 
  iname = 0xffffffd680d3301c "/storage/emulated/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001"
}

可以看到,app和sdcard访问的路径前缀有点不同,原因是app是向fuse发请求,而sdcard是向ext4发请求,不过这两个请求都是同一个文件。

再看一下这个open请求的flag。首先从do_filp_open的定义可以看到它的第三个参数就是我们要的struct open_flags指针。

crash_ARM64> p do_filp_open
do_filp_open = $33 = 
 {struct file *(int, struct filename *, const struct open_flags *)} 0xffffff99a67c26c0

通过前面的栈推导方式可推导出这个值为ffffffd6e0847e98:

crash_ARM64> struct -x open_flags ffffffd6e0847e98
struct open_flags {
  open_flag = 0x20241, 
  mode = 0x81b6, 
  acc_mode = 0x22, 
  intent = 0x300, 
  lookup_flags = 0x1
}

将open_flag转换成8进制数:

crash_ARM64> p /o 0x20241
$9 = 0401101

通过flag的宏定义:

#define O_TRUNC 00001000
#define O_CREAT 00000100
#define O_WRONLY 00000001

确定sdcard的工作线程2767正在处理的就是2120线程的open("test1001",O_WRONLY|O_CREAT|O_TRUNC)请求。

而这个open请求在做do_truncate的时候被test1001这个文件的inode锁给锁住了,持这个锁的就是com.tencent.karaoke的2124线程。

 

接下来看一下2124线程的状态。

首先当然要查看是在哪个代码中持了test1001这个文件的inode锁,前面分析到这个mutex锁的地址为ffffffd6948f4090。

这里有个通用的方法:从2124线程的栈里查找这个mutex的值ffffffd6948f4090。

先通过bt命令查找2124的栈范围为ffffffd6d396b4b0~ffffffd6d396be70:

crash_ARM64> bt 2124
PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
 #0 [ffffffd6d396b4b0] __switch_to at ffffff99a6685560
 #1 [ffffffd6d396b4e0] __schedule at ffffff99a74df9f0
 #2 [ffffffd6d396b540] schedule at ffffff99a74dfd54
 #3 [ffffffd6d396b560] request_wait_answer at ffffff99a68c0350
 #4 [ffffffd6d396b5d0] __fuse_request_send at ffffff99a68c2f60
 #5 [ffffffd6d396b600] fuse_request_send at ffffff99a68c2fb8
 #6 [ffffffd6d396b610] fuse_send_readpages at ffffff99a68c8ef8
 #7 [ffffffd6d396b640] fuse_readpages at ffffff99a68c8fe8
 #8 [ffffffd6d396b6c0] __do_page_cache_readahead at ffffff99a676cfc0
 #9 [ffffffd6d396b780] filemap_fault at ffffff99a6762fc4
#10 [ffffffd6d396b7f0] __do_fault at ffffff99a6789c38
#11 [ffffffd6d396b860] handle_mm_fault at ffffff99a678d540
#12 [ffffffd6d396b920] do_page_fault at ffffff99a6696f4c
#13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
#14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
     PC: ffffffd7017056c0  [unknown or invalid address]
     LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
     SP: 0000000080000145  PSTATE: 00000000
    X29: ffffffd6d396bb70  X28: ffffff99a67619a0  X27: ffffffd6d396bb80
    X26: 0000000000001000  X25: ffffff99a7604000  X24: 0000000000000000
    X23: ffffff99a761b580  X22: ffffffd623366500  X21: 0000000000001000
    X20: ffffffd6948f4140  X19: 0000000000000000  X18: 0000000000000000
    X17: ffffffd6d396be08  X16: 0000000000000000  X15: 0000000000000000
    X14: ffffff99a668f4a0  X13: 0000000000000000  X12: 00000000ee04c905
    X11: 00000000d0bf8f50  X10: 0000000000000018   X9: 0101010101010101
     X8: 7f7f7f7f7f7f7f7f   X7: 722433ffff787368   X6: 7f7f7f7f7f7f7fff
     X5: 0000000000002800   X4: 0000008080000000   X3: ffffffd6d396bdd0
     X2: 0000000000002800   X1: 00000000d5d3e000   X0: 0000000000000000
#15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
#16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4
#17 [ffffffd6d396bd00] fuse_passthrough_write_iter at ffffff99a68cd4d0
#18 [ffffffd6d396bd50] fuse_file_write_iter at ffffff99a68ca1f4
#19 [ffffffd6d396bdb0] __vfs_write at ffffff99a67b49a0
#20 [ffffffd6d396be30] vfs_write at ffffff99a67b515c
#21 [ffffffd6d396be70] sys_pwrite64 at ffffff99a67b5a1c

再将栈里面的数据都打印出来:

crash_ARM64> rd ffffffd6d396b4b0 -e ffffffd6d396be70
ffffffd6d396b4b0:  ffffffd6d396b4e0 ffffff99a74df9f4   ..........M.....
ffffffd6d396b4c0:  ffffffd771be3e80 ffffff99a74dfc3c   .>.q....<.M.....
ffffffd6d396b4d0:  ffffffd771be3e80 ffffff99a8a15740   .>.q....@W......
ffffffd6d396b4e0:  ffffffd6d396b540 ffffff99a74dfd58   @.......X.M.....
ffffffd6d396b4f0:  ffffffd6d3968000 ffffffd66d943800   .........8.m....
ffffffd6d396b500:  0000000000000000 ffffffd6db690dd0   ..........i.....
...
ffffffd6d396bc00:  ffffffd6d396bc50 ffffff99a68201f8   P...............
ffffffd6d396bc10:  ffffffd6948f3fe8 0000000000002800   .?.......(......
ffffffd6d396bc20:  0000000000000000 ffffffd6d396bde0   ................
ffffffd6d396bc30:  ffffffd6d396be08 ffffffd623366500   .........e6#....
ffffffd6d396bc40:  ffffffd6948f4090 0000000000000000   .@..............
ffffffd6d396bc50:  ffffffd6d396bd00 ffffff99a68cd4d4   ................
ffffffd6d396bc60:  ffffffd623366500 fffffffffffffffb   .e6#............
ffffffd6d396bc70:  ffffffd6891b9340 ffffffd6948f3fe8   @........?......
ffffffd6d396bc80:  ffffffd7060c7100 ffffffd6d396bde0   .q..............
ffffffd6d396bc90:  ffffffd66d943800 ffffffd6891b93e8   .8.m............
ffffffd6d396bca0:  ffffff99a7604000 0000000000000000   .@`.............
ffffffd6d396bcb0:  00000000d396bde0 ffffffd6891b9340   ........@.......
...

mutex值在ffffffd6d396bc40这个地址上找到了,它是在__generic_file_write_iter的栈帧里。

#15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
#16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4

那可以肯定是在__generic_file_write_iter之前就持锁了,并且很可能是ext4_file_write_iter中,查看其源码:

static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
    struct file *file = iocb->ki_filp;
    struct inode *inode = file_inode(iocb->ki_filp);
    ...
    mutex_lock(&inode->i_mutex);
    ...
    ret = __generic_file_write_iter(iocb, from);
    mutex_unlock(&inode->i_mutex);

通过栈推导,可以获得struct kiocb的值为0xffffffd6d396bde0:

crash_ARM64> struct kiocb ffffffd6d396bde0
struct kiocb {
  ki_filp = 0xffffffd623366500, 
  ki_pos = 0, 
  ki_complete = 0x0, 
  private = 0xffffffd6d396bcb4, 
  ki_flags = 0
}

其中,struct file为0xffffffd623366500:

crash_ARM64> struct file 0xffffffd623366500
struct file {
  ...
  f_path = {
    mnt = 0xffffffd776661b20, 
    dentry = 0xffffffd7017056c0
  }, 
  f_inode = 0xffffffd6948f3fe8,
}

对应的struct dentry为0xffffffd7017056c0:

crash_ARM64> struct dentry ffffffd7017056c0
struct dentry {
  d_parent = 0xffffffd6aac46e40, 
  d_name = {
    name = 0xffffffd7017056f8 "test1001"
  }, 
  d_inode = 0xffffffd6948f3fe8, 
  ...

这个和sdcard工作线程2767操作的是同一个inode。因此可以确定就是ext4_file_write_iter中持了test1001文件的inode锁。

这里有个疑问,app的write请求应该通过fuse系统给fuse发请求,但这里却直接拿着ext4对应inode操作了ext4。

原因是fuse中的一个叫passthrough的一个优化,思路是在应用进程中获取文件的ext4对应的inode后直接对其做读写操作。

static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb,
                        struct iov_iter *iter, int do_write)
{
    ssize_t ret_val;
    struct fuse_file *ff;
    struct file *fuse_file, *passthrough_filp;
    struct inode *fuse_inode, *passthrough_inode;
    struct fuse_conn *fc;

    ff = iocb->ki_filp->private_data;         /*从fused对应的file中获取private_data*/
    fuse_file = iocb->ki_filp;                /*这里保存原先的fuse对应的file*/
    passthrough_filp = ff->passthrough_filp;  /*这个是open时fuse系统返回的ext4对应的file*/
    fc = ff->fc;

    get_file(passthrough_filp);
    iocb->ki_filp = passthrough_filp;         /*这里将原有的fuse对应的file替换成ext4对应的file*/
    fuse_inode = fuse_file->f_path.dentry->d_inode;
    passthrough_inode = file_inode(passthrough_filp);

    if (do_write) {
        if (!passthrough_filp->f_op->write_iter)
            return -EIO;
        ret_val = passthrough_filp->f_op->write_iter(iocb, iter);   /*这里调用ext4的file_opration*/

open时的具体代码如下:

int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir)
{
    struct fuse_file *ff;
    struct file *passthrough_filp = NULL;
    int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

    ff = fuse_file_alloc(fc);

    ff->fh = 0;
    ff->open_flags = FOPEN_KEEP_CACHE;
    if (!fc->no_open || isdir) {
        struct fuse_open_out outarg;
        int err;

        err = fuse_send_open(fc, nodeid, file, opcode, &outarg, &(passthrough_filp)); /*fuse返回ext4的file*/
        if (!err) {
            ff->fh = outarg.fh;
            ff->open_flags = outarg.open_flags;
            ff->passthrough_filp = passthrough_filp;   /*将ext4的file保存在ff->passthrough_filp中*/
    ...

    file->private_data = fuse_file_get(ff);   /*将ff保存在fuse对应的file的pritvate_data中*/
    return 0;
}

这样可以跳过fuse deamon的过度从而增加读写效率,但这种优化导致app与sdcard产生竞争。

一般来说这种竞争是允许的,大不了在mutex锁中等待就是了。

而本例的特点是ext4_file_write_iter执行时触发了pagefault,接着触发了fuse的read请求。

此时因为sdcard的工作线程2767已经被ext4对应的inode的锁给block了,所以请求被block。

 

这里有两个疑问:

1、sdcard工作线程有3个,其他两个工作线程处于待命状态,为什么不会处理这个read请求。

2、为什么会触发pagefault,而且这个pagefault又会触发fuse的read请求。

第一个疑问:sdcard有三个工作线程:

2001            || pthread_create(&thread_default, NULL, start_handler, &handler_default)
2002            || pthread_create(&thread_read, NULL, start_handler, &handler_read)
2003            || pthread_create(&thread_write, NULL, start_handler, &handler_write)) {

 这几个工作线程名字起的不好,很容易让人产生误解。

我之前以为thread_default线程处理open、close这种请求,thread_read线程处理read请求,thread_write线程处理write请求。

所以对2124的调用栈非常困惑。为什么刚开始是write,之后又变成read、之后又再等待thread_write线程。

后来跟同事沟通才发现这三个线程可以处理所有类型的请求,只不过每一个app只会对应一个特定的工作线程。

这个是在app启动时,根据media和sdcard的访问权限的设定去匹配对应的工作线程的。

        mountServiceInternal.addExternalStoragePolicy(
                new MountServiceInternal.ExternalStorageMountPolicy() {
            @Override
            public int getMountMode(int uid, String packageName) {
                if (Process.isIsolated(uid)) {
                    return Zygote.MOUNT_EXTERNAL_NONE;
                }
                if (checkUidPermission(WRITE_MEDIA_STORAGE, uid) == PERMISSION_GRANTED) {
                    return Zygote.MOUNT_EXTERNAL_DEFAULT;
                }
                if (checkUidPermission(READ_EXTERNAL_STORAGE, uid) == PERMISSION_DENIED) {
                    return Zygote.MOUNT_EXTERNAL_DEFAULT;
                }
                if (checkUidPermission(WRITE_EXTERNAL_STORAGE, uid) == PERMISSION_DENIED) {
                    return Zygote.MOUNT_EXTERNAL_READ;
                }
                return Zygote.MOUNT_EXTERNAL_WRITE;
            }

所以com.tencent.karaoke进程的2120线程的open请求被block,同进程的2124线程被pending也是正常的。

 

第二个疑问:

为什么write操作会触发pagefault,且这个pagefault引发read请求。

从调用栈中可以看到:

#13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
#14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
     PC: ffffffd7017056c0  [unknown or invalid address]
     LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
     ...
#15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
#16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4

这里__generic_file_write_iter函数在调用iov_iter_fault_in_readable函数时触发的pagefault。

查看源码:

ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
    struct file *file = iocb->ki_filp;
    struct address_space * mapping = file->f_mapping;
    struct inode     *inode = mapping->host;
    ...
    if (iocb->ki_flags & IOCB_DIRECT) {
        ...
        status = generic_perform_write(file, from, pos = iocb->ki_pos);


ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos)
{
    struct address_space *mapping = file->f_mapping;
    const struct address_space_operations *a_ops = mapping->a_ops;
    long status = 0;
    ssize_t written = 0;
    unsigned int flags = 0;
    ...
    do {
        struct page *page;
        unsigned long offset;    /* Offset into pagecache page */
        unsigned long bytes;    /* Bytes to write to page */
        size_t copied;        /* Bytes copied from user */
        void *fsdata;

        offset = (pos & (PAGE_CACHE_SIZE - 1));
        bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_count(i));

again:
        ...
        if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
            status = -EFAULT;
            break;
        }

        if (fatal_signal_pending(current)) {
            status = -EINTR;
            break;
        }

        status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata);

        if (mapping_writably_mapped(mapping))
            flush_dcache_page(page);

        copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
        flush_dcache_page(page);

        status = a_ops->write_end(file, mapping, pos, bytes, copied,page, fsdata);
        if (unlikely(status < 0))
            break;
        copied = status;

        cond_resched();

        iov_iter_advance(i, copied);
        ...
        }
        pos += copied;
        written += copied;

        balance_dirty_pages_ratelimited(mapping);
    } while (iov_iter_count(i));

    return written ? written : status;
}
EXPORT_SYMBOL(generic_perform_write);

这里产生pagefault的是iov_iter_fault_in_readable调用,它的参数是i,而这个参数__generic_file_write_iter函数的from这个参数。

也就是说这里产生pagefault的是from指向的buffer,

write的一般调用是write(fd,buf,flag),from其实就是这里的buf。

而为什么这个bufffer的pagefault会调用fuse的read请求呢?唯一的解释就是这个buffer其实是sdcard中的文件被mmap到当前进程中的。

如何证明呢?我们看到fuse_readpages的参数中有struct file指针:

crash_ARM64> p fuse_readpages
fuse_readpages = $36 = 
 {int (struct file *, struct address_space *, struct list_head *, unsigned int)} 0xffffff99a68c8f24

因此通过栈推导可以找出这个struct file指针为ffffffd7060c6800:

crash_ARM64> struct file ffffffd7060c6800
struct file {
  ...
  f_path = {
    mnt = 0xffffffd648f13820, 
    dentry = 0xffffffd7017a1900
  }, 
  ...

通过前面的方法可以得到完整路径:

crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd7017a1900
d_parent = 0xffffffd75d695600
d_name.name = 0xffffffd7017a1938 "-111111870"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d695600
d_parent = 0xffffffd77179ec00
d_name.name = 0xffffffd75d695638 "tmp"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd77179ec00
d_parent = 0xffffffd6470033c0
d_name.name = 0xffffffd77179ec38 "file"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6470033c0
d_parent = 0xffffffd6aac460c0
d_name.name = 0xffffffd6470033f8 "cache"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac460c0
d_parent = 0xffffffd759bc1780
d_name.name = 0xffffffd6aac460f8 "com.tencent.karaoke"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd759bc1780
d_parent = 0xffffffd759bc1600
d_name.name = 0xffffffd759bc17b8 "data"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd759bc1600
d_parent = 0xffffffd75995b780
d_name.name = 0xffffffd759bc1638 "Android"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75995b780
d_parent = 0xffffffd75d6869c0
d_name.name = 0xffffffd75995b7b8 "0"
crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d6869c0
d_parent = 0xffffffd75d6869c0
d_name.name = 0xffffffd75d6869f8 "/"

完整路径为/0/Android/data/com.tencent.karaoke/cache/file/tmp/-111111870

 

问题清楚了,com.tencent.karaoke应用的2120线程在

open("test1001",O_WRONLY|O_CREAT|O_TRUNC)

同时2124线程在

fd = open("test1001")
addr = mmap("-111111870")
write(fd,addr)

 

原理已经清楚了,接下来按照上面的条件写测试程序:

static void* fuse_test1(void* data)
{
    int fd = -1;    

    while (1) {
        fd = open("/sdcard/test2.txt", O_WRONLY|O_CREAT|O_TRUNC );
        close(fd);
    }
    return NULL;
}

int main(int argc, char** argv)
{
    int ret = 0;
    int fd1 = -1;
    int fd2 = -1;
    pthread_t thread1;
    unsigned char * base = NULL;
    fd1 = open("/sdcard/test1.txt", O_RDWR);
    fd2 = open("/sdcard/test2.txt", O_RDWR);
    pthread_create(&thread1, NULL, fuse_test1, NULL);
    while(1) {
        base = (unsigned char *)mmap(0, 4096*50000,PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
        ret = write(fd2, base, 4096*50000);
        munmap(base, 4096*50000);
        usleep(1000);
    }

    return 0;
}

测试程序push到手机后必现问题,调用栈如下:

# ps -t 4225
USER      PID   PPID  VSIZE  RSS   WCHAN              PC  NAME
root      4225  4169  210128 1676  request_wa 7f9d038358 S fusetest
root      4226  4225  210128 1676  request_wa 7f9d037338 S fusetest

# cat proc/4225/stack
[<0000000000000000>] __switch_to+0x98/0xa4
[<0000000000000000>] request_wait_answer+0x64/0x248
[<0000000000000000>] __fuse_request_send+0x84/0x98
[<0000000000000000>] fuse_request_send+0x44/0x4c
[<0000000000000000>] fuse_send_readpages+0x90/0xb8
[<0000000000000000>] fuse_readpages+0xc8/0xf0
[<0000000000000000>] __do_page_cache_readahead+0x144/0x210
[<0000000000000000>] ondemand_readahead+0x214/0x230
[<0000000000000000>] page_cache_async_readahead+0xc8/0xdc
[<0000000000000000>] filemap_fault+0xc4/0x418
[<0000000000000000>] __do_fault+0x3c/0x8c
[<0000000000000000>] handle_mm_fault+0x648/0x10b4
[<0000000000000000>] do_page_fault+0x168/0x30c
[<0000000000000000>] do_mem_abort+0x40/0x9c
[<0000000000000000>] el1_da+0x18/0x78
[<0000000000000000>] __generic_file_write_iter+0xcc/0x16c
[<0000000000000000>] ext4_file_write_iter+0x230/0x2f4
[<0000000000000000>] fuse_passthrough_write_iter+0x68/0xec
[<0000000000000000>] fuse_file_write_iter+0x11c/0x210
[<0000000000000000>] __vfs_write+0xa0/0xd0
[<0000000000000000>] vfs_write+0xac/0x144
[<0000000000000000>] SyS_write+0x48/0x84
[<0000000000000000>] el0_svc_naked+0x24/0x28
[<0000000000000000>] 0xffffffffffffffff

# cat /proc/4226/stack
[<0000000000000000>] __switch_to+0x98/0xa4
[<0000000000000000>] request_wait_answer+0x64/0x248
[<0000000000000000>] __fuse_request_send+0x84/0x98
[<0000000000000000>] fuse_request_send+0x44/0x4c
[<0000000000000000>] fuse_simple_request+0x120/0x170
[<0000000000000000>] fuse_send_open+0x9c/0xb8
[<0000000000000000>] fuse_do_open+0x80/0x100
[<0000000000000000>] fuse_open_common+0x7c/0xb4
[<0000000000000000>] fuse_open+0x10/0x18
[<0000000000000000>] do_dentry_open+0x1d0/0x2ec
[<0000000000000000>] vfs_open+0x6c/0x78
[<0000000000000000>] path_openat+0x948/0xbd4
[<0000000000000000>] do_filp_open+0x3c/0x84
[<0000000000000000>] do_sys_open+0x164/0x1fc
[<0000000000000000>] SyS_openat+0x10/0x18
[<0000000000000000>] el0_svc_naked+0x24/0x28
[<0000000000000000>] 0xffffffffffffffff

# ps -t 2086
USER      PID   PPID  VSIZE  RSS   WCHAN              PC  NAME
media_rw  2086  569   15460  2652  wait_woken 7fb3d1bd70 S /system/bin/sdcard
media_rw  2099  2086  15460  2652   pipe_wait 7fb3d1bd70 S sdcard
media_rw  2100  2086  15460  2652  do_truncat 7fb3d1b338 D sdcard
media_rw  2101  2086  15460  2652  fuse_dev_d 7fb3d1bd70 S sdcard
media_rw  2102  2086  15460  2652  fuse_dev_d 7fb3d1bd70 S sdcard
                                                                                                                                                                           
# cat /proc/2100/stack
[<0000000000000000>] __switch_to+0x98/0xa4
[<0000000000000000>] do_truncate+0x6c/0xa0
[<0000000000000000>] path_openat+0xa10/0xbd4
[<0000000000000000>] do_filp_open+0x3c/0x84
[<0000000000000000>] do_sys_open+0x164/0x1fc
[<0000000000000000>] SyS_openat+0x10/0x18
[<0000000000000000>] el0_svc_naked+0x24/0x28
[<0000000000000000>] 0xffffffffffffffff

相关流程如下:

 

解决方案:

出问题原因是fuse的passthrough流程中先持inode锁再去访问了buf地址产生pagefault导致了死锁。

如果在持锁前先访问buf就能避免出现死锁,也就能避免这种死锁:

static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
    ...

    if (ff && ff->passthrough_enabled && ff->passthrough_filp) {
+        size_t bytes = iov_iter_count(from);
+        /*
+         * page fault before fuse_passthrough_write_iter.
+         * Otherwise there is a deadlock to send a fuse req to fuse deamon.
+         * as it hold the inode->i_mutex
+         */
+        iov_iter_fault_in_multipages_readable(from, bytes);
        written = fuse_passthrough_write_iter(iocb, from);
        goto out;
    }

 

posted @ 2017-07-12 14:01  YYPapa  阅读(3480)  评论(1编辑  收藏  举报