/*
ARMLinux源代码分析(1)--head.S

TigerZ(tigerz@yeah.net)
http://emblinux.org/

1. 分析环境

    kernel: 2.6.10
    board: SMDK2410 1.32, 64M SDRAM, 128M SM卡

2. head.S
*/

/*
 * linux/arch/arm/kernel/head.S
 *
 *  Copyright (C) 1994-2002 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  Kernel startup code for all 32-bit CPUs
 */
#include <linux/config.h>
#include <linux/linkage.h>
#include <linux/init.h>

#include <asm/assembler.h>
#include <asm/mach-types.h>
#include <asm/procinfo.h>
#include <asm/ptrace.h>
#include <asm/constants.h>

#ifndef CONFIG_XIP_KERNEL /* 我在的平台, 这个宏没有定义 */
/*
 * We place the page tables 16K below TEXTADDR.  Therefore, we must make sure
 * that TEXTADDR is correctly set.  Currently, we expect the least significant
 * 16 bits to be 0x8000, but we could probably relax this restriction to
 * TEXTADDR >= PAGE_OFFSET + 0x4000
 *
 * 页表放在比TEXTADDR低16K的地方, 所以我们必须要确认TEXTADDR被正确设定. 目前,
 * 我们希望低16位为0x8000, 不过我们可以放松这个限制为
 * TEXTADDR >= PAGE_OFFSET + 0x4000
 *
 * Note that swapper_pg_dir is the virtual address of the page tables, and
 * pgtbl gives us a position-independent reference to these tables.  We can
 * do this because stext == TEXTADDR
 *
 * 注意swapper_pg_dir是页表的虚拟地址, pgtbl给我们一个位置无关的页表地址, 这是
 * 因为stext == TEXTADDR.
 */
#if (TEXTADDR & 0xffff) != 0x8000
    #error TEXTADDR must start at 0xXXXX8000
#endif

    /* swapper_pg_dir = 0x30008000 - 0x4000 */
    .globl swapper_pg_dir
    .equ    swapper_pg_dir, TEXTADDR - 0x4000

    /* rd = stext - 0x4000, 这里stext == TEXTADDR */
    /* adr指令是在pc值上+/-一个标号的偏移得到的, 所以得到的地址只跟pc和标号到
     * pc的偏移相关, 跟编译地址无关. 在MMU打开前, 代码要是地址无关的, 会经常
     * 用到adr.
     */
    .macro pgtbl, rd, phys
    adr     \rd, stext
    sub     \rd, \rd, #0x4000
    .endm
#else
/*
* XIP Kernel:
*
* We place the page tables 16K below DATAADDR.  Therefore, we must make sure
* that DATAADDR is correctly set.  Currently, we expect the least significant
* 16 bits to be 0x8000, but we could probably relax this restriction to
* DATAADDR >= PAGE_OFFSET + 0x4000
*
* Note that pgtbl is meant to return the physical address of swapper_pg_dir.
* We can't make it relative to the kernel position in this case since
* the kernel can physically be anywhere.
*/
#if (DATAADDR & 0xffff) != 0x8000
#error DATAADDR must start at 0xXXXX8000
#endif

.globl swapper_pg_dir
.equ swapper_pg_dir, DATAADDR - 0x4000

.macro pgtbl, rd, phys
ldr \rd, =((DATAADDR - 0x4000) - VIRT_OFFSET)
add \rd, \rd, \phys
.endm
#endif

/*
 * Kernel startup entry point.
 * ---------------------------
 * Kernel的启动入口.
 *
 * This is normally called from the decompressor code.  The requirements
 * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
 * r1 = machine nr.
 *
 * 这里一般由解压缩代码调用. 它需要: MMU = off, D-cache = off,
 * I-cache = 不关心, r0 = 0, r1 = 机器号.
 *
 * This code is mostly position independent, so if you link the kernel at
 * 0xc0008000, you call this at __pa(0xc0008000).
 *
 * 这里的代码通常是位置无关的, 所以如果你在0xc0008000链接kernel, 直接调用
 * __pa(0xc0008000).
 *
 * See linux/arch/arm/tools/mach-types for the complete list of machine
 * numbers for r1.
 *
 * 在linux/arch/arm/tools/mach-types里查看机器号r1的完整列表.
 *
 * We're trying to keep crap to a minimum; DO NOT add any machine specific
 * crap here - that's what the boot loader (or in extreme, well justified
 * circumstances, zImage) is for.
 *
 */
    __INIT
    .type   stext, #function
ENTRY(stext)
    mov     r12, r0
    mov     r0, #PSR_F_BIT | PSR_I_BIT | MODE_SVC   @ make sure svc mode
                                                /* 确认是SVC模式 */
    msr     cpsr_c, r0          @ and all irqs disabled
                            /* 关闭所有irq */
    bl      __lookup_processor_type     /* 查看处理器类型 */
    teq     r10, #0             @ invalid processor?
                            /* 无效的处理器? */
    moveq r0, #'p'        @ yes, error 'p'
                            /* 如果是, 错误码'p' */
    beq     __error
    bl      __lookup_architecture_type /* 查看体系类型 */
    teq     r7, #0              @ invalid architecture?
                            /* 无效体系? */
    moveq r0, #'a'        @ yes, error 'a'
                            /* 如果是, 错误码'a' */
    beq     __error
    bl      __create_page_tables    /* 建立页表 */

    /*
     * The following calls CPU specific code in a position independent
     * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
     * xxx_proc_info structure selected by __lookup_architecture_type
     * above.  On return, the CPU will be ready for the MMU to be
     * turned on, and r0 will hold the CPU control register value.
     *
     * 下面的代码按位置无关的方式调用特定的CPU代码, 详细信息查看
     * arch/arm/mm/proc-*.S. r0 = xxx_proc_info结构起始地址, 该结构由上面
     * 的__lookup_architecture_type选择. 在返回时, CPU已经准备好打开MMU,
     * r0将保存CPU控制寄存器的值.
     */
    adr     lr, __turn_mmu_on       @ return (PIC) address
                                    /* 调用完下面的__arm920_setup后返回到
                                       __turn_mmu_on */
    add     pc, r10, #12            /* 调用__arm920_setup , 详见
                                       arch/arm/mm/proc-arm920.S */

    .type   __switch_data, %object
__switch_data:
    .long   __mmap_switched
    .long   __data_loc              @ r2
    .long   __data_start            @ r3
    .long   __bss_start             @ r4
    .long   _end                    @ r5
    .long   processor_id            @ r6
    .long   __machine_arch_type     @ r7
    .long   cr_alignment            @ r8
    .long   init_thread_union+8192  @ sp

    /*
     * Enable the MMU.  This completely changes the structure of the visible
     * memory space.  You will not be able to trace execution through this.
     * If you have an enquiry about this, *please* check the linux-arm-kernel
     * mailing list archives BEFORE sending another post to the list.
     *
     * 打开MMU. 这将完全改变可见存储空间的结构, 你不能跟踪运行情况.
     * 如果你有疑问, 请先查看linux-arm-kernel邮件列表, 在你发邮件到这个列表前.
     */
    .align 5
    .type   __turn_mmu_on, %function
__turn_mmu_on:
    ldr     lr, __switch_data                   /* 完成后跳到__switch_data偏移0存放的地址,
                                                   即__mmap_switched */
    #ifdef CONFIG_ALIGNMENT_TRAP
    orr     r0, r0, #2                          @ ...........A.
                                                /* 访问非对齐地址产生一个TRAP */
    #endif
    mcr     p15, 0, r0, c1, c0, 0               @ write control reg
    mrc     p15, 0, r3, c0, c0, 0               @ read id reg
    mov     r3, r3
    mov     r3, r3
    mov     pc, lr

/*
 * The following fragment of code is executed with the MMU on, and uses
 * absolute addresses; this is not position independent.
 *
 * 下面的代码在MMU打开的情况下运行, 是绝对地址, 不是位置无关的.
 *  r0  = processor control register
 *  r1  = machine ID
 *  r9  = processor ID
 *  r12 = value of r0 when kernel was called (currently always zero)
 *
 * 这里主要是为调用C函数start_kernel做准备. 在调用这部分代码前, 我们必须
 * 打开MMU, 因为kernel的位置相关代码是要以0xc0008000为起始地址的, 所以在这
 * 之前, 我们建立了页表, 并打开MMU.
 */
    .align 5
__mmap_switched:
    adr         r2, __switch_data + 4
    ldmia       r2, {r2, r3, r4, r5, r6, r7, r8, sp}

    cmp         r2, r3              @ Copy data segment if needed
1:  cmpne       r3, r4
    ldrne       fp, [r2], #4
    strne       fp, [r3], #4
    bne         1b

    mov         fp, #0              @ Clear BSS (and zero fp)
1:  cmp         r4, r5
    strcc       fp, [r4],#4
    bcc         1b

    str         r9, [r6]   @ Save processor ID
    str         r1, [r7]   @ Save machine type
    bic         r2, r0, #2   @ Clear 'A' bit
    stmia       r8, {r0, r2}   @ Save control register values
    b           start_kernel


/*
 * Setup the initial page tables.  We only setup the barest
 * amount which are required to get the kernel running, which
 * generally means mapping in the kernel code.
 *
 * 设置初始化页表, 我们只设置kernel运行所需少量内存, 通常就是
 * 映射kernel的代码.
 *
 * r5 = physical address of start of RAM
 * r6 = physical IO address
 * r7 = byte offset into page tables for IO
 * r8 = page table flags
 */
__create_page_tables:
        /* 取页表地址到r4, 这里r4 = 0x30004000 */
        pgtbl r4, r5                          @ page table address

        /*
         * Clear the 16K level 1 swapper page table
         *
         * 清空页表
         */
        mov     r0, r4
        mov r3, #0
        add r2, r0, #0x4000
1: str r3, [r0], #4
        str r3, [r0], #4
        str r3, [r0], #4
        str r3, [r0], #4
        teq r0, r2
        bne 1b

        /*
         * Create identity mapping for first MB of kernel to
         * cater for the MMU enable.  This identity mapping
         * will be removed by paging_init().  We use our current program
         * counter to determine corresponding section base address.\
         *
         * kernel的第1M建立等价映射, 为打开MMU做准备. 这个等价映射将在
         * paging_init()中移除. 我们用pc来确定对应的段地址.
         */
        mov r2, pc, lsr #20   @ start of kernel section
        add r3, r8, r2, lsl #20  @ flags + kernel base (flags = 0x00000c1e)
        str r3, [r4, r2, lsl #2]  @ identity mapping
        /*
         *           r4           +  (pc >> 20) << 2
         * 31                   14 13           2 1 0
         * -------------------------------------------
         * |   Translatioin base  | Table index  |0|0|
         * -------------------------------------------
         * 这一个表项把kernel section的虚地址和实地址等同起来.
         * r3 = 0x00000c1e + 0x30000000 = 0x30000c1e
         * r4 = 0x30004000
         * r2 >> 18 = 0x30000000 >> 18 = 0xc00
         * [0x30004000 + 0xc00] = 0x30000c1e
         */

        /*
         * Now setup the pagetables for our kernel direct
         * mapped region.  We round TEXTADDR down to the
         * nearest megabyte boundary.  It is assumed that
         * the kernel fits within 4 contigous 1MB sections.
         *
         * 现在为kernel设置页表, 把TEXTADDR完整成MB, 这里假设
         * kernel在4个连续的1MB段内.
         */
        /* TEXTADDR = 0xc0000000 */
        add r0, r4,  #(TEXTADDR & 0xff000000) >> 18 @ start of kernel
        /* r0 = 0x30004000 + 0x3000 */
        str r3, [r0, #(TEXTADDR & 0x00f00000) >> 18]!
        /* [0x30004000 + 0x3000] = 0x30000c1e */
        /*
         * add r0, r4, #(TEXTADDR & 0xfff00000) >> 18
         * str r3, [r0]
         * 上2行的意思同这2行相同, 为什么不这样做?
         */
        add r3, r3, #1 << 20
        str r3, [r0, #4]!   @ KERNEL + 1MB
        /* [0x30004000 + 0x3004] = 0x30100c1e */
        add r3, r3, #1 << 20
        str r3, [r0, #4]!   @ KERNEL + 2MB
        /* [0x30004000 + 0x3008] = 0x30200c1e */
        add r3, r3, #1 << 20
        str r3, [r0, #4]   @ KERNEL + 3MB
        /* [0x30004000 + 0x300c] = 0x30300c1e */

        /*
         * Then map first 1MB of ram in case it contains our boot params.
         *
         * 因为RAM的第1MB存放有boot参数, 所以也需要映射. (当前boot参数所在
         * 的段与前面pc值所在段相等, 但这不是一定的.
         */
        add r0, r4, #VIRT_OFFSET >> 18
        add r2, r5, r8
        str r2, [r0]

/* 下面代码略过 */
#ifdef CONFIG_XIP_KERNEL
/*
* Map some ram to cover our .data and .bss areas.
* Mapping 3MB should be plenty.
*/
sub r3, r4, r5
mov r3, r3, lsr #20
add r0, r0, r3, lsl #2
add r2, r2, r3, lsl #20
str r2, [r0], #4
add r2, r2, #(1 << 20)
str r2, [r0], #4
add r2, r2, #(1 << 20)
str r2, [r0]
#endif

bic r8, r8, #0x0c   @ turn off cacheable
@ and bufferable bits
#ifdef CONFIG_DEBUG_LL
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
add r0, r4, r7
rsb r3, r7, #0x4000   @ PTRS_PER_PGD*sizeof(long)
cmp r3, #0x0800
addge r2, r0, #0x0800
addlt r2, r0, r3
orr r3, r6, r8
1: str r3, [r0], #4
add r3, r3, #1 << 20
teq r0, r2
bne 1b
#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder, we need to map in
* the 16550-type serial port for the debug messages
*/
teq r1, #MACH_TYPE_NETWINDER
teqne r1, #MACH_TYPE_CATS
bne 1f
add r0, r4, #0x3fc0   @ ff000000
mov r3, #0x7c000000
orr r3, r3, r8
str r3, [r0], #4
add r3, r3, #1 << 20
str r3, [r0], #4
1:
#endif
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug.  This is
* only for Acorn RiscPC architectures.
*/
add r0, r4, #0x80   @ 02000000
mov r3, #0x02000000
orr r3, r3, r8
str r3, [r0]
add r0, r4, #0x3600   @ d8000000
str r3, [r0]
#endif
        /* 返回 */
        mov pc, lr
        .ltorg

/*
 * Exception handling.  Something went wrong and we can't proceed.  We
 * ought to tell the user, but since we don't have any guarantee that
 * we're even running on the right architecture, we do virtually nothing.
 *
 * 异常处理. 一些东西错了就不能继续, 我们应该告诉用户, 但因为我们甚至不能
 * 保证运行在正确的体系上, 我们实际在做无用功.
 *
 * r0 = ascii error character:
 * a = invalid architecture
 * p = invalid processor
 * i = invalid calling convention
 *
 * Generally, only serious errors cause this.
 */
__error:
        #ifdef CONFIG_DEBUG_LL
        mov r8, r0    @ preserve r0
        adr r0, err_str
        bl printascii
        mov r0, r8
        bl printch
        #endif

#ifdef CONFIG_ARCH_RPC
/*
* Turn the screen red on a error - RiscPC only.
*/
mov r0, #0x02000000
mov r3, #0x11
orr r3, r3, r3, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
#endif

1: mov r0, r0
        b 1b

#ifdef CONFIG_DEBUG_LL
err_str:
        .asciz "\nError: "
        .align
#endif

/*
 * Read processor ID register (CP#15, CR0), and look up in the linker-built
 * supported processor list.  Note that we can't use the absolute addresses
 * for the __proc_info lists since we aren't running with the MMU on
 * (and therefore, we are not in the correct address space).  We have to
 * calculate the offset.
 *
 * 读取处理器ID寄存器(CP#15, CR0), 在链接时建立的已支持处理器列表中查找.
 * 注意, 我们不能使用__proc_info的绝对地址, 因为我们还没有运行MMU,
 * (因此, 我们不在正确的地址空间). 我们必须计算偏移.
 *
 * Returns:
 * r5, r6, r7 corrupted
 * r8  = page table flags
 * r9  = processor ID
 * r10 = pointer to processor structure
 */
__lookup_processor_type:
        adr r5, 2f                          /* 取2f的相对地址 */
        ldmia r5, {r7, r9, r10}               /* r7 = __proc_info_end,
                                                   r9 = __proc_info_begin,
                                                   r10 = 2 */
        sub r5, r5, r10   @ convert addresses
                                                /* r5 = 2的相对地址 - 2的绝对地址 */
        add r7, r7, r5   @ to our address space
                                                /* r7 += r5, 把r7从绝对地址转化为
                                                   相对地址 */
        add r10, r9, r5                     /* 绝对转相对 */
        mrc p15, 0, r9, c0, c0  @ get processor id
1: ldmia r10, {r5, r6, r8}  @ value, mask, mmuflags
        and r6, r6, r9   @ mask wanted bits
        teq r5, r6
        moveq pc, lr
        add r10, r10, #PROC_INFO_SZ  @ sizeof(proc_info_list)
        cmp r10, r7
        blt 1b
        mov r10, #0    @ unknown processor
        mov pc, lr

        /*
         * Look in include/asm-arm/procinfo.h and arch/arm/kernel/arch.[ch] for
         * more information about the __proc_info and __arch_info structures.
         */
2: .long __proc_info_end
        .long __proc_info_begin
        .long 2b
        .long __arch_info_begin
        .long __arch_info_end

/*
 * Lookup machine architecture in the linker-build list of architectures.
 * Note that we can't use the absolute addresses for the __arch_info
 * lists since we aren't running with the MMU on (and therefore, we are
 * not in the correct address space).  We have to calculate the offset.
 *
 * 在链接时建立的机器列表中查找机器结构. 注意我们不能使用__arch_info的绝对地址,
 * 因为我们没有打开MMU(因此, 我们不在正确的地址空间). 我们必须计算偏移.
 *
 *  r1 = machine architecture number
 * Returns:
 *  r2, r3, r4 corrupted
 *  r5 = physical start address of RAM
 *  r6 = physical address of IO
 *  r7 = byte offset into page tables for IO
 */
__lookup_architecture_type:
        adr r4, 2b                          /* 这里的做法同__lookup_processor_type */
        ldmia r4, {r2, r3, r5, r6, r7} @ throw away r2, r3
        sub r5, r4, r5   @ convert addresses
        add r4, r6, r5   @ to our address space
        add r7, r7, r5
1: ldr r5, [r4]   @ get machine type
        teq r5, r1    @ matches loader number?
        beq 2f    @ found
        add r4, r4, #SIZEOF_MACHINE_DESC @ next machine_desc
        cmp r4, r7
        blt 1b
        mov r7, #0    @ unknown architecture
        mov pc, lr
2: ldmib r4, {r5, r6, r7}  @ found, get results
        mov pc, lr

zhuanzi:http://www.cnitblog.com/zouzheng/articles/14673.html