linux 2.6.32.220的一个crash记录
有同事分析一个crash,我参与了分析,记录如下,供遇到相同crash的兄弟参考:
crash> bt PID: 24632 TASK: ffff881723ce8080 CPU: 14 COMMAND: "30-00-SS" #0 [ffff881723cef6d0] machine_kexec at ffffffff8103244b #1 [ffff881723cef730] crash_kexec at ffffffff810bb0e2 #2 [ffff881723cef800] oops_end at ffffffff814fe120 #3 [ffff881723cef830] no_context at ffffffff810425db #4 [ffff881723cef880] __bad_area_nosemaphore at ffffffff81042865 #5 [ffff881723cef8d0] bad_area at ffffffff8104298e #6 [ffff881723cef900] __do_page_fault at ffffffff810430c0 #7 [ffff881723cefa20] do_page_fault at ffffffff8150014e #8 [ffff881723cefa50] page_fault at ffffffff814fd485 [exception RIP: udp_send_skb+744] RIP: ffffffff81497358 RSP: ffff881723cefb08 RFLAGS: 00010202 RAX: 000000008cfa0d02 RBX: ffff881439a092c0 RCX: 0000000000000000 RDX: 000000000000005c RSI: 0000000000000014 RDI: ffff881061d4b800 RBP: ffff881723cefb48 R8: 000000004a00300a R9: 0000000039a092c0 R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000005c R13: ffff8811f8e77100 R14: 0000000000000000 R15: ffff881061d4b824 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff881723cefb50] udp_sendmsg at ffffffff8149781d #10 [ffff881723cefc50] inet_sendmsg at ffffffff8149fc5a #11 [ffff881723cefc90] sock_sendmsg at ffffffff814261ca #12 [ffff881723cefe40] sys_sendto at ffffffff81426ae9 #13 [ffff881723ceff80] system_call_fastpath at ffffffff8100b0d2 RIP: 0000003e1580ee83 RSP: 00007ffb0c9339b8 RFLAGS: 00000202 RAX: 000000000000002c RBX: ffffffff8100b0d2 RCX: 0000000000000002 RDX: 0000000000000054 RSI: 00007ffb0c934380 RDI: 0000000000000b0d RBP: 00007ffaf4175c80 R8: 00007ffb0c933fb0 R9: 000000000000001c R10: 0000000000000000 R11: 0000000000000293 R12: 00007ffb0c934258 R13: 00007ffb0c93432c R14: 000000000000e2ab R15: 00007ffaf4d3a878 ORIG_RAX: 000000000000002c CS: 0033 SS: 002b crash>
BUG: unable to handle kernel paging request at 000000008cfa0d76 IP: [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0 PGD be47ab067 PUD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/online CPU 14 Modules linked in: ***,省略 Pid: 24632, comm: 30-00-SS Tainted: G W ---------------- 2.6.32-220.el6.x86_64 #1 To be filled by O.E.M. To be filled by O.E.M./To be filled by O.E.M. RIP: 0010:[<ffffffff81497358>] [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0 RSP: 0018:ffff881723cefb08 EFLAGS: 00010202 RAX: 000000008cfa0d02 RBX: ffff881439a092c0 RCX: 0000000000000000 RDX: 000000000000005c RSI: 0000000000000014 RDI: ffff881061d4b800 RBP: ffff881723cefb48 R08: 000000004a00300a R09: 0000000039a092c0 R10: 0000000000000000 R11: 0000000000000000 R12: 000000000000005c R13: ffff8811f8e77100 R14: 0000000000000000 R15: ffff881061d4b824 FS: 00007ffb0c935700(0000) GS:ffff880c5a780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000008cfa0d76 CR3: 0000000c10733000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process 30-00-SS (pid: 24632, threadinfo ffff881723cee000, task ffff881723ce8080) Stack: 0000000000000000 5e2d320a2654dacc ffff880c94c6acc0 ffff8811f8e77100 <0> 0000000000000000 000000004a00300a ffff881723cefe58 ffffffff81471f40 <0> ffff881723cefc48 ffffffff8149781d ffff881723cefbc0 ffffffff00000040 Call Trace: [<ffffffff81471f40>] ? ip_generic_getfrag+0x0/0xb0 [<ffffffff8149781d>] udp_sendmsg+0x2ed/0x8f0 [<ffffffff8149fc5a>] inet_sendmsg+0x4a/0xb0 [<ffffffff814261ca>] sock_sendmsg+0x11a/0x150 [<ffffffff810925e0>] ? autoremove_wake_function+0x0/0x40 [<ffffffff810a4b2e>] ? futex_wake+0x10e/0x120 [<ffffffff8100bdee>] ? reschedule_interrupt+0xe/0x20 [<ffffffff8117f90a>] ? fget_light+0x7a/0x90 [<ffffffff81426ae9>] sys_sendto+0x139/0x190 [<ffffffff810d6a12>] ? audit_syscall_entry+0xc2/0x2b0 [<ffffffff8100b0d2>] system_call_fastpath+0x16/0x1b Code: 30 01 00 00 4e 8d 3c 3f 48 8b 44 07 18 44 89 fe 2b b3 d8 00 00 00 41 29 f4 48 85 c0 44 89 e2 0f 84 9e 00 00 00 66 0f 1f 44 00 00 <03> 48 74 40 0f 92 c7 2b 50 68 48 8b 00 40 0f b6 ff 8d 0c 0f 48 RIP [<ffffffff81497358>] udp_send_skb+0x2e8/0x3d0 RSP <ffff881723cefb08> CR2: 000000008cfa0d76
从堆栈看,用户通过sendto系统调用进入内核,在内核态调用 udp_send_skb 时出现异常,执行的代码为 :udp_send_skb+0x2e8/0x3d0
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/net/checksum.h: 64 0xffffffff81497358 <udp_send_skb+744>: add 0x74(%rax),%ecx----------------------------------------异常
根据代码行,确定 checksum.h:64所在的函数是:csum_add,根据调用链
0xffffffff814977eb <udp_sendmsg+699>: callq 0xffffffff81474820 <ip_make_skb> /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 764 0xffffffff814977f0 <udp_sendmsg+704>: test %rax,%rax /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 763 0xffffffff814977f3 <udp_sendmsg+707>: mov %eax,%r9d /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 764 0xffffffff814977f6 <udp_sendmsg+710>: je 0xffffffff81497a6d <udp_sendmsg+1341> 0xffffffff814977fc <udp_sendmsg+716>: cmp $0xfffffffffffff000,%rax 0xffffffff81497802 <udp_sendmsg+722>: ja 0xffffffff81497a6d <udp_sendmsg+1341> /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 765 0xffffffff81497808 <udp_sendmsg+728>: movzwl -0xb2(%rbp),%edx 0xffffffff8149780f <udp_sendmsg+735>: mov -0xb0(%rbp),%esi 0xffffffff81497815 <udp_sendmsg+741>: mov %rax,%rdi 0xffffffff81497818 <udp_sendmsg+744>: callq 0xffffffff81497070 <udp_send_skb>
rax就是skb,没有使用栈来保存,在调用udp_send_skb之前,又赋值给了rdi,所以rdi就是skb,进入udp_send_skb的堆栈之后:
crash> dis -l udp_send_skb /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 519 0xffffffff81497070 <udp_send_skb>: push %rbp 0xffffffff81497071 <udp_send_skb+1>: mov %rsp,%rbp 0xffffffff81497074 <udp_send_skb+4>: push %r15 0xffffffff81497076 <udp_send_skb+6>: push %r14 0xffffffff81497078 <udp_send_skb+8>: push %r13 0xffffffff8149707a <udp_send_skb+10>: push %r12 0xffffffff8149707c <udp_send_skb+12>: push %rbx 0xffffffff8149707d <udp_send_skb+13>: sub $0x18,%rsp 0xffffffff81497081 <udp_send_skb+17>: nopl 0x0(%rax,%rax,1) 0xffffffff81497086 <udp_send_skb+22>: mov %esi,-0x34(%rbp) /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/skbuff.h: 1227 0xffffffff81497089 <udp_send_skb+25>: mov 0xbc(%rdi),%r12d-------------------------------------此时rdi就是skb,r12d 就是skb->transport_header /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 519 0xffffffff81497090 <udp_send_skb+32>: mov %rdi,%rbx
我们可以看到,rdi赋值给了rbx,且rbx在最后遇到异常指令之前,都没有被修改过,所以rbx中就是skb指针。也就是:ffff881439a092c0 ,有了skb,我们就好分析udp_send_skb的执行流了。
crash> struct -x sk_buff.sk ffff881439a092c0 sk = 0xffff8811f8e77100 crash> struct udp_sock.pcflag 0xffff8811f8e77100 pcflag = 0 '\000'
crash> struct sock.sk_no_check 0xffff8811f8e77100
sk_no_check = 0
crash> struct -x sk_buff.ip_summed ffff881439a092c0
ip_summed = 0x3
所以is_udplite的值为0,
if (is_udplite) /* UDP-Lite */------------------------这个不会执行 csum = udplite_csum(skb); else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */--------udp_csum_noxmit为1,所以也不会执行 skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */------------checksum_parital为3,走这个流程 udp4_hwcsum(skb, rt->rt_src, daddr); goto send; } else csum = udp_csum(skb);
看一下udp4_hwcsum流程:
static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { struct udphdr *uh = udp_hdr(skb); struct sk_buff *frags = skb_shinfo(skb)->frag_list; int offset = skb_transport_offset(skb); int len = skb->len - offset; int hlen = len; __wsum csum = 0; if (!frags) { /* * Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ do { csum = csum_add(csum, frags->csum); hlen -= frags->len; } while ((frags = frags->next)); csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } }
需要分析 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
crash> struct -x sk_buff.head ffff881439a092c0 head = 0xffff881061d4b800 "\330,~\350\372\177" crash> struct -x sk_buff.end ffff881439a092c0 end = 0xc0 crash> px 0xffff881061d4b800+0xc0 $18 = 0xffff881061d4b8c0 crash> struct skb_shared_info.frag_list^C crash> struct skb_shared_info.frag_list 0xffff881061d4b8c0 frag_list = 0x8cfa0d02 crash> crash> struct frag_list.next 0x8cfa0d02 struct: invalid kernel virtual address: 0x8cfa0d02 crash> struct -xo frag_list.next struct: invalid data structure reference: frag_list crash> struct -xo frag_list struct: invalid data structure reference: frag_list crash> struct -xo sk_buff.next struct sk_buff { [0x0] struct sk_buff *next; } crash> struct -xo sk_buff.csum struct sk_buff { [0x74] __wsum csum; } crash> p 0x8cfa0d02+0x74 $19 = 2365197686 crash> px 0x8cfa0d02+0x74 $20 = 0x8cfa0d76--------------------这个地址不对,导致panic
frags 为 0x8cfa0d02 ,这个地址有问题,导致了panic。
这时候需要返回回去分析skb的组成了,而frags又和ip_options相关,ip_options又和ipc相关,先获取到ipc
#9 [ffff881723cefb50] udp_sendmsg at ffffffff8149781d ffff881723cefb58: ffff881723cefbc0 ffffffff00000040 ffff881723cefb68: ffff881723cefbc8 ffffffff00000000 ffff881723cefb78: ffff881723cefba8 ffff881100000000 ffff881723cefb88: ffffffff8201f300 abe2880c94c6acc0 ffff881723cefb98: 000000005e2d320a 0000000000000054 ffff881723cefba8: 000000005e2d320a ffff880c641d7f40--------ipc.opt的地址是ffff881723cefbb0,它的值是 ffff880c641d7f40 ffff881723cefbb8: ffff881723cefc00 0000000000000000 ffff881723cefbc8: 0000000000000000 5e2d320a00000000 ffff881723cefbd8: 000000004a00300a 0000000000000000 ffff881723cefbe8: 0000000000000000 0000000000000000 ffff881723cefbf8: abe22c6800000011 0000000000000000 ffff881723cefc08: ffff881723cefd18 000000002654dacc ffff881723cefc18: ffff881723cefc98 ffff881723cefc98 ffff881723cefc28: ffff881723cefe58 ffff881723cefc98 ffff881723cefc38: 0000000000000054 ffff881723cefd88 ffff881723cefc48: ffff881723cefc88 ffffffff8149fc5a #10 [ffff881723cefc50] inet_sendmsg at ffffffff8149fc5a
627 ipc.opt = NULL; 628 ipc.shtx.flags = 0; usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 627 0xffffffff81497605 <udp_sendmsg+213>: movq $0x0,-0x98(%rbp) /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/ipv4/udp.c: 628 0xffffffff81497610 <udp_sendmsg+224>: movb $0x0,-0x90(%rbp)
可以确定,ipc.opt的地址为 0xffff881723cefbb0,其值为 0xffff881723cefbb0 ,由于opt本身是一个指针,所以可以取出 堆栈 0xffff881723cefbb0 中对应的值 ffff880c641d7f40 来获取ip_options
crash> px 0xffff881723cefc48-0x98 $17 = 0xffff881723cefbb0 crash> struct ip_options ffff880c641d7f40 struct ip_options { faddr = 1346456898, optlen = 0 '\000', srr = 1 '\001', rr = 0 '\000', ts = 1 '\001', is_strictroute = 1 '\001', srr_is_hit = 1 '\001', is_changed = 1 '\001', rr_needaddr = 1 '\001', ts_needtime = 1 '\001', ts_needaddr = 1 '\001', router_alert = 255 '\377', cipso = 255 '\377', __pad2 = 255 '\377', __data = 0xffff880c641d7f4c "\377\377\377\377\377\377\377\377\377\377\377\377" }
我们看这个内容,明显是错误的,因为我们不会开启srr,这个ip选项的具体意义,建议大家参考《深入理解linux网络技术内幕》
crash> kmem ffff880c641d7f40 CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE ffff880c2fc40100 size-64 64 1099272 1195930 20270 4k SLAB MEMORY TOTAL ALLOCATED FREE ffff880c641d7000 ffff880c641d7140 59 9 50 FREE / [ALLOCATED] [ffff880c641d7f40]
说明这个ipc->opt是已经分配的slab,但不管怎么样,按照逻辑,要和inet->opt一致才对,我们看下inet->opt为多少,而要知道inet的值,就需要按照调用栈分析
从开始调用链开始分析:
我们获取以下sendto中的msg,socket指针。
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1706 0xffffffff81426a3d <sys_sendto+141>: movq $0x0,-0x120(%rbp)
根据源码:
1700 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1701 if (!sock) 1702 goto out; 1703 1704 iov.iov_base = buff; 1705 iov.iov_len = len; 1706 msg.msg_name = NULL; 1707 msg.msg_iov = &iov; 1708 msg.msg_iovlen = 1; 1709 msg.msg_control = NULL; 1710 msg.msg_controllen = 0;
根据堆栈:
#12 [ffff881723cefe40] sys_sendto at ffffffff81426ae9 ffff881723cefe48: 0000000000000054 00007ffb0c934380 ffff881723cefe58: ffff881723cefeb8 01ff880c0000001c-------------------ffff881723cefe58为struct msghdr msg的地址 ffff881723cefe68: ffff881723cefe98 0000000000000001 ffff881723cefe78: 0000000000000000 0000000000000000 ffff881723cefe88: 0000000000000040 0000000000016000 ffff881723cefe98: 00007ffb0c934380 0000000000000054 ffff881723cefea8: ffff88180f46c7c0 0000000000000001 ffff881723cefeb8: 5e2d320aabe20002 0000000000000000 ffff881723cefec8: 0000000000000000 0000000000000000 ffff881723cefed8: 0000000000006038 0000000001d4e3b0 ffff881723cefee8: ffff881723ceff78 ffff881723ce8638 ffff881723cefef8: ffff881723ceff78 ffffffff810d6a12 ffff881723ceff08: ffff881723ce8640 ffff88162c949540 ffff881723ceff18: ffff881723ce8080 ffff88180deb4e48 ffff881723ceff28: ffff88180deb4e48 ffff88180deb4e48 ffff881723ceff38: ffff881723ceff78 000000002654dacc ffff881723ceff48: 00007ffb0c934090 00007ffb0c934380 ffff881723ceff58: 00007ffaf4d3a878 000000000000e2ab ffff881723ceff68: 00007ffb0c93432c 00007ffb0c934258 ffff881723ceff78: 00007ffaf4175c80 ffffffff8100b0d2---------------ffff881723ceff78 为rbp
#13 [ffff881723ceff80] system_call_fastpath at ffffffff8100b0d2
先从栈中找到msghr 的地址:
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1706 0xffffffff81426a3d <sys_sendto+141>: movq $0x0,-0x120(%rbp) crash> px 0xffff881723ceff78-0x120 $11 = 0xffff881723cefe58
对应1706行的代码是 msg.msg_name = NULL;,对应的汇编语句是 movq $0x0,-0x120(%rbp),所以rbp减去0x120就是msg的地址,因为msg_name是msg的第一个成员:
crash> struct msghdr ffff881723cefe58 struct msghdr { msg_name = 0xffff881723cefeb8, msg_namelen = 28, msg_iov = 0xffff881723cefe98, msg_iovlen = 1, msg_control = 0x0, msg_controllen = 0, msg_flags = 64 }
由于iov赋值给了msg_iov成员,所以也可以确定iov的地址为 0xffff881723cefe98
crash> struct msghdr ffff881723cefe58 struct msghdr { msg_name = 0xffff881723cefeb8, msg_namelen = 28, msg_iov = 0xffff881723cefe98, msg_iovlen = 1, msg_control = 0x0, msg_controllen = 0,--------------------------这个标志在udp_sendmsg中会用到 msg_flags = 64 } crash> struct iovec 0xffff881723cefe98 struct iovec { iov_base = 0x7ffb0c934380, iov_len = 84----------------用户态需要发送的buf中的字节数 }
下面分析sock指针,用户态通过fd传入到内核态来找到对应的socket指针:
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1700 0xffffffff81426a04 <sys_sendto+84>: callq 0xffffffff81424e80 <sockfd_lookup_light> /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1701 0xffffffff81426a09 <sys_sendto+89>: test %rax,%rax /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 1700 0xffffffff81426a0c <sys_sendto+92>: mov %rax,%r14 可以看到r14存放了sockfd_lookup_light的返回值,并且该寄存器没有再被修改,刚好调用sock_sendmsg,r14又压栈了,所以可以从栈中取出sock指针: crash> dis -l sock_sendmsg /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 576 0xffffffff814260b0 <sock_sendmsg>: push %rbp 0xffffffff814260b1 <sock_sendmsg+1>: mov %rsp,%rbp 0xffffffff814260b4 <sock_sendmsg+4>: sub $0x1a0,%rsp 0xffffffff814260bb <sock_sendmsg+11>: mov %rbx,-0x28(%rbp) 0xffffffff814260bf <sock_sendmsg+15>: mov %r12,-0x20(%rbp) 0xffffffff814260c3 <sock_sendmsg+19>: mov %r13,-0x18(%rbp) 0xffffffff814260c7 <sock_sendmsg+23>: mov %r14,-0x10(%rbp)
所以可以获取socket指针为:
struct socket ffff88180357c400 struct socket { state = SS_UNCONNECTED, type_begin = 0xffff88180357c404, type = 2, type_end = 0xffff88180357c408, flags = 0, fasync_list = 0x0, wait = { lock = { raw_lock = { slock = 2715525595 } }, task_list = { next = 0xffff8813bd0eb378, prev = 0xffff8813bd0eb378 } }, file = 0xffff88109b957ec0, sk = 0xffff8811f8e77100, ops = 0xffffffff81664840 <inet_dgram_ops>
很容易根据 struct sock *sk = sock->sk;获取到对应的sk为 0xffff8811f8e77100 ,而这个指针是sock,inet_sock,udp_sock等的基础,因为他们起始地址是一样的。我们可以用skb来验证这个sk是否正常:
crash> struct -x sk_buff.sk ffff881439a092c0 sk = 0xffff8811f8e77100
skb分析出来的sk和我们从sys_sendto分析的sk是一致的。
crash> struct sock 0xffff8811f8e77100 struct sock { __sk_common = { { skc_node = { next = 0xffff881727a3b100, pprev = 0xffff881236f157c0 }, skc_nulls_node = { next = 0xffff881727a3b100, pprev = 0xffff881236f157c0 } }, skc_refcnt = { counter = 2 }, skc_hash = 26668, skc_family = 2, skc_state = 1 '\001', skc_reuse = 0 '\000', skc_bound_dev_if = 0, skc_bind_node = { next = 0x0, pprev = 0x0 }, skc_prot = 0xffffffff81b175c0 <udp_prot>, skc_net = 0xffffffff8201f300 <init_net> }, flags_begin = 0xffff8811f8e77140, sk_shutdown = 0, sk_no_check = 0, sk_userlocks = 13, sk_protocol = 17, sk_type = 2, flags_end = 0xffff8811f8e77144, sk_rcvbuf = 8388608, sk_lock = { slock = { raw_lock = { slock = 1400263542 } }, owned = 0, wq = { lock = { raw_lock = { slock = 131074 } }, task_list = { next = 0xffff8811f8e77158, prev = 0xffff8811f8e77158 } } }, sk_backlog = { head = 0x0, tail = 0x0 }, sk_sleep = 0xffff88180357c418, sk_dst_cache = 0xffff8813b02e3800, sk_policy = {0x0, 0x0}, sk_dst_lock = { raw_lock = { lock = 16777216 } }, sk_rmem_alloc = { counter = 552 }, sk_wmem_alloc = { counter = 425 }, sk_omem_alloc = { counter = 0 }, sk_sndbuf = 1048568, sk_receive_queue = { next = 0xffff880c94c6acc0, prev = 0xffff880c94c6acc0, qlen = 1, lock = { raw_lock = { slock = 309990010 } } }, sk_write_queue = { next = 0xffff8811f8e771c8, prev = 0xffff8811f8e771c8, qlen = 0, lock = { raw_lock = { slock = 0 } } }, sk_async_wait_queue = { next = 0xffff8811f8e771e0, prev = 0xffff8811f8e771e0, qlen = 0, lock = { raw_lock = { slock = 0 } } }, sk_wmem_queued = 0, sk_forward_alloc = 3544, sk_allocation = 208, sk_route_caps = 0, sk_gso_type = 0, sk_gso_max_size = 0, sk_rcvlowat = 1, sk_flags = 256, sk_lingertime = 0, sk_error_queue = { next = 0xffff8811f8e77228, prev = 0xffff8811f8e77228, qlen = 0, lock = { raw_lock = { slock = 0 } } }, sk_prot_creator = 0xffffffff81b175c0 <udp_prot>, sk_callback_lock = { raw_lock = { lock = 16777216 } }, sk_err = 0, sk_err_soft = 0, sk_drops = { counter = 0 }, sk_ack_backlog = 0, sk_max_ack_backlog = 0, sk_priority = 0, sk_peercred = { pid = 0, uid = 4294967295, gid = 4294967295 }, sk_rcvtimeo = 9223372036854775807, sk_sndtimeo = 9223372036854775807, sk_filter = 0x0, sk_protinfo = 0x0, sk_timer = { entry = { next = 0x0, prev = 0x0 }, expires = 0, function = 0x0, data = 0, base = 0xffff881811e94000, start_site = 0x0, start_comm = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000", start_pid = -1 }, sk_stamp = { tv64 = 0 }, sk_socket = 0xffff88180357c400, sk_user_data = 0x0, sk_sndmsg_page = 0x0, sk_send_head = 0x0, sk_sndmsg_off = 0, sk_write_pending = 0, sk_security = 0x0, sk_mark = 0, sk_classid = 0, sk_state_change = 0xffffffff81429380 <sock_def_wakeup>, sk_data_ready = 0xffffffff814298b0 <sock_def_readable>, sk_write_space = 0xffffffff81429810 <sock_def_write_space>, sk_error_report = 0xffffffff81429790 <sock_def_error_report>, sk_backlog_rcv = 0xffffffff81495fa0 <__udp_queue_rcv_skb>, sk_destruct = 0xffffffff814a13c0 <inet_sock_destruct> }
同理我们根据汇编可以分析kiocb 参数和 sock_iocb 参数:
/usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 581 0xffffffff814260ff <sock_sendmsg+79>: movq $0x0,-0x190(%rbp)-------------这个就是操作 kiocb.ki_flags = 0;
0xffffffff8142610a <sock_sendmsg+90>: movl $0x1,-0x188(%rbp) 0xffffffff81426114 <sock_sendmsg+100>: movl $0xffffffff,-0x184(%rbp) 0xffffffff8142611e <sock_sendmsg+110>: movq $0x0,-0x180(%rbp) /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/list.h: 30 0xffffffff81426129 <sock_sendmsg+121>: mov %rax,-0x130(%rbp) /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/include/linux/list.h: 31 0xffffffff81426130 <sock_sendmsg+128>: mov %rax,-0x128(%rbp) /usr/src/debug/kernel-2.6.32-220.el6/linux-2.6.32-220.el6.x86_64/net/socket.c: 582 0xffffffff81426137 <sock_sendmsg+135>: lea -0xb0(%rbp),%rax ------------这个就是取iocb.private = &siocb;
crash> struct -x kiocb 0xffff881723cefc98 struct kiocb { ki_run_list = { next = 0xffff881700000064, prev = 0xffff881723cefca0 }, ki_flags = 0x0, ki_users = 0x1, ki_key = 0xffffffff, ki_filp = 0x0, ki_ctx = 0x0, ki_cancel = 0x0, ki_retry = 0x0, ki_dtor = 0x0, ki_obj = { user = 0xffff881723ce8080, tsk = 0xffff881723ce8080 }, ki_user_data = 0x0, ki_wait = { flags = 0x0, private = 0xffff881723ce8080, func = 0xffffffff810925e0 <autoremove_wake_function>, task_list = { next = 0xffff881723cefd08, prev = 0xffff881723cefd08 } }, ki_pos = 0xffff880b74cfdea8, private = 0xffff881723cefd88,------------------这个就是&siocb ki_opcode = 0xffff, ki_nbytes = 0xffff881723cefd68, ki_buf = 0xffffffea <Address 0xffffffea out of bounds>, ki_left = 0x0, ki_inline_vec = { iov_base = 0xffff881723cefdb8, iov_len = 0xffffffff810a4b2e }, ki_iovec = 0x6038, ki_nr_segs = 0xffffffff81ecbdc0, ki_cur_seg = 0x1d4e000, ki_list = { next = 0xffff880c06021180, prev = 0x3b0 }, ki_eventfd = 0x2654dacc }
crash> px 0xffff881723cefe38-0xb0 $16 = 0xffff881723cefd88 crash> struct sock_iocb 0xffff881723cefd88 struct sock_iocb { list = { next = 0xffffffff8100bdee <reschedule_interrupt+14>, prev = 0xffff881723cefe18 }, flags = 659, size = 84, sock = 0xffff88180357c400, sk = 0x1c, scm = 0x0, msg = 0xffff881723cefe58, async_msg = { msg_name = 0x1, msg_namelen = -1684701504, msg_iov = 0xffff881723cefeb0, msg_iovlen = 18446612203643961072, msg_control = 0xffffffffffffff02, msg_controllen = 18446744071580416266, msg_flags = 28 }, kiocb = 0xffff881723cefeb8 }
既然已经有了sock指针,那么获取的inet_sock.opt 为:
crash> struct inet_sock.opt 0xffff8811f8e77100 opt = 0xffff881656d78fc0
很奇怪,我们回到本文开始的ipc.opt,在udp_sendmsg中,我们看到ipc.opt被赋值为:
689 if (!ipc.opt)//开始为NULL,满足条件,然后赋值为inet->opt 690 ipc.opt = inet->opt;
ipc.opt为0xffff881723cefbb0 ,而正常这个值应该等于inet.opt才对,唯一的可能是,inet->opt在赋值给ipc.opt之后,被其他流程修改了。
有个老同事怀疑,可能是因为inet->opt在赋值给ipc.opt之后,并且在ipc.opt被使用之前,被其他setsockopt的流程给改了。因为setsockopt的流程中,有如下的调用链:
opt = xchg(&inet->opt, opt);
kfree(opt);
inet->opt 之前的指针,返回给opt,然后直接kfree掉了,如果ipc->opt使用的是这个after free这个状态的inet->opt的话,并且这个slab已经被别人重新占用了,就会出现两个指针不一致的情况。