ARM Linux Data Abort 异常处理流程
// 本文部分内容来自网络
// 基于内核版本3.4
发生Data Abort异常后,ARM处理器首先根据向量表找到对应异常入口,向量表位于arch/arm/kernel/entry-armv.S:
.globl __vectors_start __vectors_start: ARM( swi SYS_ERROR0 ) THUMB( svc #0 ) THUMB( nop ) W(b) vector_und + stubs_offset W(ldr) pc, .LCvswi + stubs_offset W(b) vector_pabt + stubs_offset W(b) vector_dabt + stubs_offset W(b) vector_addrexcptn + stubs_offset W(b) vector_irq + stubs_offset W(b) vector_fiq + stubs_offset .globl __vectors_end
对于data abort,对应的跳转地址是vector_dabt + stubs_offset。这个地址的指令定义也在entry-armv.S
/* * Data abort dispatcher * Enter in ABT mode, spsr = USR CPSR, lr = USR PC */ vector_stub dabt, ABT_MODE, 8 .long __dabt_usr @ 0 (USR_26 / USR_32) .long __dabt_invalid @ 1 (FIQ_26 / FIQ_32) .long __dabt_invalid @ 2 (IRQ_26 / IRQ_32) .long __dabt_svc @ 3 (SVC_26 / SVC_32) .long __dabt_invalid @ 4 .long __dabt_invalid @ 5 .long __dabt_invalid @ 6 .long __dabt_invalid @ 7 .long __dabt_invalid @ 8 .long __dabt_invalid @ 9 .long __dabt_invalid @ a .long __dabt_invalid @ b .long __dabt_invalid @ c .long __dabt_invalid @ d .long __dabt_invalid @ e .long __dabt_invalid @ f
怎么将vector_dabt + stubs_offset和上述代码对应起来呢?将vector_stub的宏定义展开就能看出:
/* * Vector stubs. * * This code is copied to 0xffff0200 so we can use branches in the * vectors, rather than ldr's. Note that this code must not * exceed 0x300 bytes. * * Common stub entry macro: * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC * * SP points to a minimal amount of processor-private memory, the address * of which is copied into r0 for the mode specific abort handler. */ .macro vector_stub, name, mode, correction=0 .align 5 vector_\name: .if \correction sub lr, lr, #\correction .endif @ @ Save r0, lr_<exception> (parent PC) and spsr_<exception> @ (parent CPSR) @ stmia sp, {r0, lr} @ save r0, lr mrs lr, spsr @保存跳转之前的CPSR到lr寄存器 str lr, [sp, #8] @ save spsr @ @ Prepare for SVC32 mode. IRQs remain disabled. @ mrs r0, cpsr eor r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE) msr spsr_cxsf, r0 @准备进入svc模式 @ @ the branch table must immediately follow this code @ and lr, lr, #0x0f @得到跳转前所处的模式(usr、svr等) THUMB( adr r0, 1f ) THUMB( ldr lr, [r0, lr, lsl #2] ) mov r0, sp ARM( ldr lr, [pc, lr, lsl #2] ) @根据模式跳转到相应的data abort指令,并进入svc模式 movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) .align 2 @ handler addresses follow this label 1: .endm
对于同一个异常,根据进入异常之前所处的模式,会跳转到不同的指令分支,这些指令分支紧跟在vector_stub宏定义的后面。如果进入data abort之前处于usr模式,那么跳转到__dabt_usr;如果处于svc模式,那么跳转到__dabt_svc;否则跳转到__dabt_invalid。 实际上,进入异常向量前Linux只能处于usr或者svc两种模式之一。这时因为irq等异常在跳转表中都要经过vector_stub宏,而不管之前是哪种状态,这个宏都会将CPU状态改为svc模式。 usr模式即Linux中的用户态模式,svc即内核模式。
__dabt_svc流程,调用dabt_helper继续:
__dabt_svc: svc_entry mov r2, sp dabt_helper @ @ IRQs off again before pulling preserved data off the stack @ disable_irq_notrace #ifdef CONFIG_TRACE_IRQFLAGS tst r5, #PSR_I_BIT bleq trace_hardirqs_on tst r5, #PSR_I_BIT blne trace_hardirqs_off #endif svc_exit r5 @ return from exception UNWIND(.fnend ) ENDPROC(__dabt_svc)
dabt_helper流程,调用CPU_DABORT_HANDLER继续:
.macro dabt_helper @ @ Call the processor-specific abort handler: @ @ r2 - pt_regs @ r4 - aborted context pc @ r5 - aborted context psr @ @ The abort handler must return the aborted address in r0, and @ the fault status register in r1. r9 must be preserved. @ #ifdef MULTI_DABORT ldr ip, .LCprocfns mov lr, pc ldr pc, [ip, #PROCESSOR_DABT_FUNC] #else bl CPU_DABORT_HANDLER #endif .endm #ifdef CONFIG_KPROBES .section .kprobes.text,"ax",%progbits #else .text #endif
CPU_DABORT_HANDLER是一个宏定义,以armv6架构为例,最终函数为v6_early_abort
# define CPU_DABORT_HANDLER v6_early_abort
v6_early_abort:
首先读取DFSR和DFAR两个协处理寄存器,保存在R1和R0中,然后调用do_DataAbort进入C语言环境:
ENTRY(v6_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR b do_DataAbort
DFSR:失效状态寄存器,Data Fault Status Register -> R1: fsr
DFAR:失效地址寄存器,Data Fault Address Register ->R0:addr
do_DataAbort:
asmlinkage void __exception do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); struct siginfo info; if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; info.si_addr = (void __user *)addr; arm_notify_die("", regs, &info, fsr, 0); }
const struct fsr_info *inf = fsr_info + fsr_fs(fsr):
根据FSR的状态值,查询fsr_info表,得到相应处理函数与信号值。
struct fsr_info { int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); int sig; int code; const char *name; };
static struct fsr_info fsr_info[] = { { do_bad, SIGBUS, 0, "unknown 0" }, { do_bad, SIGBUS, 0, "unknown 1" }, { do_bad, SIGBUS, 0, "unknown 2" }, { do_bad, SIGBUS, 0, "unknown 3" }, { do_bad, SIGBUS, 0, "reserved translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, ......
fsr_info数组列出了常见的地址失效处理方案,例如do_page_fault处理缺页中断,do_translation_fault处理转换错误,其他不能处理的默认为do_bad:
static int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; }
do_bad不做任何处理,返回1后再调用arm_notify_die后处理:
void arm_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, unsigned long err, unsigned long trap) { if (user_mode(regs)) { current->thread.error_code = err; current->thread.trap_no = trap; force_sig_info(info->si_signo, info, current); } else { die(str, regs, err); } }
arm_notify_die函数判断当前处于Kernel模式还是User模式,如果是Kernel模式直接die,如果是User模式,调用force_sig_info向进程强制发送fsr_info表中对应信号,强制发送信号可以忽略信号处理的SIG_IGN标记,和stask_struct的blocked域。进程收到信号后,接着进行coredump等流程。