理解Linux内核之中断控制
乍一看下边的Linux内核代码,貌似L3389有bug,于是我就绕有兴趣地阅读了一下local_irq_save/local_irq_restore的源代码。
/* linux-4.14.12/mm/slab.c#3389 */ 3377 static __always_inline void * 3378 slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) 3379 { 3380 unsigned long save_flags; 3381 void *objp; .... 3389 local_irq_save(save_flags); 3390 objp = __do_cache_alloc(cachep, flags); 3391 local_irq_restore(save_flags); .... 3399 return objp; 3400 }
在L3380和L3389中, 如果local_irq_save()是一个函数,必然存在着bug, 因为需要把save_flags的变量地址传给local_irq_save()才对。
3380 unsigned long save_flags; .... 3389 local_irq_save(save_flags);
L3389是不是该是这样才对啊?
3389 local_irq_save(&save_flags);
但是,local_irq_save()和local_irq_restore()不是函数,而是宏,这样就没有bug了。
1. local_irq_save()和local_irq_restore()的实现
/* linux-4.14.12/include/linux/irqflags.h#139 */ 105 #ifdef CONFIG_TRACE_IRQFLAGS ... 110 #define local_irq_save(flags) \ 111 do { \ 112 raw_local_irq_save(flags); \ 113 trace_hardirqs_off(); \ 114 } while (0) 115 116 117 #define local_irq_restore(flags) \ 118 do { \ 119 if (raw_irqs_disabled_flags(flags)) { \ 120 raw_local_irq_restore(flags); \ 121 trace_hardirqs_off(); \ 122 } else { \ 123 trace_hardirqs_on(); \ 124 raw_local_irq_restore(flags); \ 125 } \ 126 } while (0) ... 135 #else /* !CONFIG_TRACE_IRQFLAGS */ ... 139 #define local_irq_save(flags) \ 140 do { \ 141 raw_local_irq_save(flags); \ 142 } while (0) 143 #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) ... 146 #endif /* CONFIG_TRACE_IRQFLAGS */
为简单起见,我们只关注!CONFIG_TRACE_IRQFLAGS分支就好了,
139 #define local_irq_save(flags) \ 140 do { \ 141 raw_local_irq_save(flags); \ 142 } while (0) 143 #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
于是, 我们可以认为, locale_irq_save()/local_irq_restore()等同于:
#define local_irq_save(flags) raw_local_irq_save(flags) #define local_irq_restore(flags) raw_local_irq_restore(flags)
2. raw_local_irq_save()和raw_local_irq_restore()的实现
/* linux-4.14.12/include/linux/irqflags.h#78 */ 78 #define raw_local_irq_save(flags) \ 79 do { \ 80 typecheck(unsigned long, flags); \ 81 flags = arch_local_irq_save(); \ 82 } while (0) 83 #define raw_local_irq_restore(flags) \ 84 do { \ 85 typecheck(unsigned long, flags); \ 86 arch_local_irq_restore(flags); \ 87 } while (0)
关于宏typecheck()不做解释,因为很直观,就是保证flags的类型必须是unsigned long。于是,raw_local_irq_save()和raw_local_irq_restore()等同于:
#define raw_local_irq_save(flags) flags = arch_local_irq_save() #define raw_local_irq_restore(flags) arch_local_irq_restore(flags)
下面以x86为例说明arch_local_irq_save()和arch_local_irq_restore()这两个函数的实现。
3. arch_local_irq_save()和arch_local_irq_restore()这两个函数在x86上的实现
/* linux-4.14.12/arch/x86/include/asm/irqflags.h#70 */ 70 static inline notrace unsigned long arch_local_save_flags(void) 71 { 72 return native_save_fl(); 73 } 74 75 static inline notrace void arch_local_irq_restore(unsigned long flags) 76 { 77 native_restore_fl(flags); 78 } ... 111 static inline notrace unsigned long arch_local_irq_save(void) 112 { 113 unsigned long flags = arch_local_save_flags(); 114 arch_local_irq_disable(); 115 return flags; 116 }
函数arch_local_irq_save()在调用arch_local_save_flags()还做了一件事,那就是调用arch_local_irq_disable()把中断禁止掉。 接下来,我们首先看看native_save_fl()和native_restore_fl()的具体实现。
3.1 native_save_fl()的实现
/* linux-4.14.12/arch/x86/include/asm/irqflags.h#16 */ 16 static inline unsigned long native_save_fl(void) 17 { 18 unsigned long flags; 19 20 /* 21 * "=rm" is safe here, because "pop" adjusts the stack before 22 * it evaluates its effective address -- this is part of the 23 * documented behavior of the "pop" instruction. 24 */ 25 asm volatile("# __raw_save_flags\n\t" 26 "pushf ; pop %0" 27 : "=rm" (flags) 28 : /* no input */ 29 : "memory"); 30 31 return flags; 32 }
这是一段内嵌的汇编代码,后面写一个简单的demo再解释。
3.2 native_restore_fl()的实现
/* linux-4.14.12/arch/x86/include/asm/irqflags.h#34 */ 34 static inline void native_restore_fl(unsigned long flags) 35 { 36 asm volatile("push %0 ; popf" 37 : /* no output */ 38 :"g" (flags) 39 :"memory", "cc"); 40 }
同样,这也是内嵌的汇编代码,后面写一个简单的demo再解释。
3.3 反汇编理解native_save_fl()和native_restore_fl()
- foo.c
1 static inline unsigned long native_save_fl(void) 2 { 3 unsigned long flags; 4 5 /* 6 * "=rm" is safe here, because "pop" adjusts the stack before 7 * it evaluates its effective address -- this is part of the 8 * documented behavior of the "pop" instruction. 9 */ 10 asm volatile("# __raw_save_flags\n\t" 11 "pushf ; pop %0" 12 : "=rm" (flags) 13 : /* no input */ 14 : "memory"); 15 16 return flags; 17 } 18 19 static inline void native_restore_fl(unsigned long flags) 20 { 21 asm volatile("push %0 ; popf" 22 : /* no output */ 23 :"g" (flags) 24 :"memory", "cc"); 25 } 26 27 int main(int argc, char *argv[]) 28 { 29 unsigned long flags = native_save_fl(); 30 native_restore_fl(flags); 31 return 0; 32 }
- 用gcc编译并反汇编
veli@idorax:/tmp$ gcc -g -Wall -o foo foo.c veli@idorax:/tmp$ veli@idorax:/tmp$ gdb foo GNU gdb (Ubuntu 7.11.1-0ubuntu1~16.5) 7.11.1 ...<snip>................................... (gdb) set disassembly-flavor intel (gdb) (gdb) disas /m main Dump of assembler code for function main: 28 { 0x00000000004004f5 <+0>: push rbp 0x00000000004004f6 <+1>: mov rbp,rsp 0x00000000004004f9 <+4>: sub rsp,0x20 0x00000000004004fd <+8>: mov DWORD PTR [rbp-0x14],edi 0x0000000000400500 <+11>: mov QWORD PTR [rbp-0x20],rsi 29 unsigned long flags = native_save_fl(); 0x0000000000400504 <+15>: call 0x4004d6 <native_save_fl> 0x0000000000400509 <+20>: mov QWORD PTR [rbp-0x8],rax 30 native_restore_fl(flags); 0x000000000040050d <+24>: mov rax,QWORD PTR [rbp-0x8] 0x0000000000400511 <+28>: mov rdi,rax 0x0000000000400514 <+31>: call 0x4004e6 <native_restore_fl> 31 return 0; 0x0000000000400519 <+36>: mov eax,0x0 32 } 0x000000000040051e <+41>: leave 0x000000000040051f <+42>: ret End of assembler dump. (gdb) # (gdb) disas /m native_save_fl Dump of assembler code for function native_save_fl: 2 { 0x00000000004004d6 <+0>: push rbp 0x00000000004004d7 <+1>: mov rbp,rsp 3 unsigned long flags; 4 5 /* 6 * "=rm" is safe here, because "pop" adjusts the stack before 7 * it evaluates its effective address -- this is part of the 8 * documented behavior of the "pop" instruction. 9 */ 10 asm volatile("# __raw_save_flags\n\t" 0x00000000004004da <+4>: pushf 0x00000000004004db <+5>: pop rax 0x00000000004004dc <+6>: mov QWORD PTR [rbp-0x8],rax 11 "pushf ; pop %0" 12 : "=rm" (flags) 13 : /* no input */ 14 : "memory"); 15 16 return flags; 0x00000000004004e0 <+10>: mov rax,QWORD PTR [rbp-0x8] 17 } 0x00000000004004e4 <+14>: pop rbp 0x00000000004004e5 <+15>: ret End of assembler dump. (gdb) # (gdb) disas /m native_restore_fl Dump of assembler code for function native_restore_fl: 20 { 0x00000000004004e6 <+0>: push rbp 0x00000000004004e7 <+1>: mov rbp,rsp 0x00000000004004ea <+4>: mov QWORD PTR [rbp-0x8],rdi 21 asm volatile("push %0 ; popf" 0x00000000004004ee <+8>: push QWORD PTR [rbp-0x8] 0x00000000004004f1 <+11>: popf 22 : /* no output */ 23 :"g" (flags) 24 :"memory", "cc"); 25 } 0x00000000004004f2 <+12>: nop 0x00000000004004f3 <+13>: pop rbp 0x00000000004004f4 <+14>: ret End of assembler dump. (gdb) q veli@idorax:/tmp$
根据上面的反汇编代码不难看出,native_save_fl()和native_restore_fl()的实现异常简单。
- native_save_fl()
; static inline unsigned long native_save_fl(void) 0x00000000004004da <+4>: pushf ; 把标志寄存器(FLAGS)压入栈(Stack)中 0x00000000004004db <+5>: pop rax ; 通过出栈操作把标志寄存器的值存入rax中 0x00000000004004dc <+6>: mov QWORD PTR [rbp-0x8],rax ; 把rax存入局部变量flags中 0x00000000004004e0 <+10>: mov rax,QWORD PTR [rbp-0x8] ; 根据ABI, 返回值总是存于rax中,这里等同于return flags
- native_restore_fl()
; static inline void native_restore_fl(unsigned long flags) 0x00000000004004ea <+4>: mov QWORD PTR [rbp-0x8],rdi ; 根据ABI, 函数的第一个参数通过寄存器rdi传递 ; 于是,等同于将第一个参数flags存入一个局部变量中 0x00000000004004ee <+8>: push QWORD PTR [rbp-0x8] ; 等同于将第一个参数flags压入栈中 0x00000000004004f1 <+11>: popf ; 通过初栈操作把flags的值回复到标志寄存器FLAGS中
注意:操作标志寄存器FLAGS(16位: flags, 32位: eflags, 64位: rflags),必须通过pushf和popf这两个指令,而不能使用push和pop指令。
因此,我们可以得出如下结论,(在x86平台上)
- local_irq_save()就是把标志寄存器保存到一个局部变量flags中,然后禁止中断;
- local_irq_restore()则是通过局部变量flags的值恢复标志寄存器,中断自动打开。
4. arch_local_irq_disable()和arch_local_irq_enable()在x86上的实现
/* linux-4.14.12/arch/x86/include/asm/irqflags.h#80 */ 80 static inline notrace void arch_local_irq_disable(void) 81 { 82 native_irq_disable(); 83 } 84 85 static inline notrace void arch_local_irq_enable(void) 86 { 87 native_irq_enable(); 88 } /* linux-4.14.12/arch/x86/include/asm/irqflags.h#42 */ 42 static inline void native_irq_disable(void) 43 { 44 asm volatile("cli": : :"memory"); 45 } 46 47 static inline void native_irq_enable(void) 48 { 49 asm volatile("sti": : :"memory"); 50 }
从上面的代码可以看出,在x86中,
- arch_local_irq_disable()的实质是执行汇编指令cli
- arch_local_irq_enable的实质则是执行汇编指令sti
到此为止,我们已经搞清楚了如下4个宏的作用。
- local_irq_disable() : 禁止本地中断传递。 在x86上,本质上是调用汇编指令cli;
- local_irq_enable() : 激活本地中断传递。 在x86上,本质上是调用汇编指令sti;
- local_irq_save() : 保存本地中断传递的当前状态,然后禁止本地中断传递。在x86上,本质上是调用pushf+pop先保存标志寄存器到一个变量flags中,然后调用汇编指令cli;
- local_irq_restore() : 恢复本地中断传递到给定的状态。在x86上,本质上是调用push+popf重置标志寄存器。
更多有关中断控制的细节,请阅读源代码和《Linux Kernel Development》一书的第7章:中断和中断处理。 常用的中断控制方法,如下图所示。
If all you have is a hammer, everything looks like a nail. | 如果你拥有的东西就只有一把锤子,那么一切事物在你眼里都看起来是钉子。 (P.S. 保持Open的心态很重要啊)