《深入理解Linux内核3rd》学习笔记——初始化临时页表
初始化临时页表的工作由startup_32函数完成,本文要讲的startup_32函数定义在arch/i386/kernel/head.S文件中。其代码如下(解析请见注释)。
1 ENTRY(startup_32)
2
3 /*
4 * Set segments to known values.
5 */
6 cld
7 lgdt boot_gdt_descr - __PAGE_OFFSET
8 movl $(__BOOT_DS),%eax
9 movl %eax,%ds
10 movl %eax,%es
11 movl %eax,%fs
12 movl %eax,%gs
13
14 /*
15 * Clear BSS first so that there are no surprises...
16 * No need to cld as DF is already clear from cld above...
17 */
18 xorl %eax,%eax
19 movl $__bss_start - __PAGE_OFFSET,%edi
20 movl $__bss_stop - __PAGE_OFFSET,%ecx
21 subl %edi,%ecx
22 shrl $2,%ecx
23 rep ; stosl
24
25 /*
26 * Initialize page tables. This creates a PDE and a set of page
27 * tables, which are located immediately beyond _end. The variable
28 * init_pg_tables_end is set up to point to the first "safe" location.
29 * Mappings are created both at virtual address 0 (identity mapping)
30 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
31 *
32 * Warning: don't use %esi or the stack in this code. However, %esp
33 * can be used as a GPR if you really need it...
34 */
35 page_pde_offset = (__PAGE_OFFSET >> 20); /* __PAGE_OFFSET是0xc0000000,page_pde_offset = 3072 = 0xc00,是页目录中的偏移 */
36
37 movl $(pg0 - __PAGE_OFFSET), %edi /* 将pg0对应的物理地址送到edi */
38 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx /* 将swapper_pg_dir(存放临时页全局目录的地址)送到edx */
39 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
40 10:
41 leal 0x007(%edi),%ecx /* Create PDE entry —— 构造一个页目录项(地址+标志位),把edi指向的物理地址加上0x007放入ecx中 */
42 movl %ecx,(%edx) /* 第一次循环时把ecx中的内容放入swapper_pg_dir的第0项里,第二次循环时把ecx中的内容放入swapper_pg_dir的第1项里 */
43 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
44 addl $4,%edx /* edx加上一个页目录项长度(4字节),指向页全局目录的下一个页目录项地址 */
45 movl $1024, %ecx /* 初始化1024个页目录项,设置计数器 */
46 11:
47 stosl /* eax -> [edi]; edi = edi + 4 */
48 addl $0x1000,%eax /* 更改eax的值,为下次stosl作准备 */
49 loop 11b /* 循环操作,其实就是初始化1024个页表项 */
50 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
51 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
52 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp /* ebp = edi指向的物理地址 + INIT_MAP_BEYOND_END(128K)+ 0x007 */
53 cmpl %ebp,%eax /* 比较ebp与eax */
54 jb 10b /* 如果eax < ebp,则跳到上面10的地方 */
55 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) /* 此时的edi中存放pg0+0x2000,将此值存入init_pg_tables_end中,表示页表初始化结束 */
56
57 #ifdef CONFIG_SMP
58 xorl %ebx,%ebx /* This is the boot CPU (BSP) */
59 jmp 3f
60
61 /*
62 * Non-boot CPU entry point; entered from trampoline.S
63 * We can't lgdt here, because lgdt itself uses a data segment, but
64 * we know the trampoline has already loaded the boot_gdt_table GDT
65 * for us.
66 */
67 ENTRY(startup_32_smp)
68 cld
69 movl $(__BOOT_DS),%eax
70 movl %eax,%ds
71 movl %eax,%es
72 movl %eax,%fs
73 movl %eax,%gs
74
75 /*
76 * New page tables may be in 4Mbyte page mode and may
77 * be using the global pages.
78 *
79 * NOTE! If we are on a 486 we may have no cr4 at all!
80 * So we do not try to touch it unless we really have
81 * some bits in it to set. This won't work if the BSP
82 * implements cr4 but this AP does not -- very unlikely
83 * but be warned! The same applies to the pse feature
84 * if not equally supported. --macro
85 *
86 * NOTE! We have to correct for the fact that we're
87 * not yet offset PAGE_OFFSET..
88 */
89 #define cr4_bits mmu_cr4_features-__PAGE_OFFSET
90 movl cr4_bits,%edx
91 andl %edx,%edx
92 jz 6f
93 movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
94 orl %edx,%eax
95 movl %eax,%cr4
96
97 btl $5, %eax # check if PAE is enabled
98 jnc 6f
99
100 /* Check if extended functions are implemented */
101 movl $0x80000000, %eax
102 cpuid
103 cmpl $0x80000000, %eax
104 jbe 6f
105 mov $0x80000001, %eax
106 cpuid
107 /* Execute Disable bit supported? */
108 btl $20, %edx
109 jnc 6f
110
111 /* Setup EFER (Extended Feature Enable Register) */
112 movl $0xc0000080, %ecx
113 rdmsr
114
115 btsl $11, %eax
116 /* Make changes effective */
117 wrmsr
118
119 6:
120 /* This is a secondary processor (AP) */
121 xorl %ebx,%ebx
122 incl %ebx
123
124 3:
125 #endif /* CONFIG_SMP */
126
127 /*
128 * Enable paging
129 */
130 movl $swapper_pg_dir-__PAGE_OFFSET,%eax
131 movl %eax,%cr3 /* set the page table pointer.. ——将页全局目录的地址送入cr3寄存器中 */
132 movl %cr0,%eax
133 orl $0x80000000,%eax /* 设置PG位 */
134 movl %eax,%cr0 /* ..and set paging (PG) bit */
135 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
136 1:
137 /* Set up the stack pointer */
138 lss stack_start,%esp /* 建立内核堆栈 */
139
140 /*
141 * Initialize eflags. Some BIOS's leave bits like NT set. This would
142 * confuse the debugger if this code is traced.
143 * XXX - best to initialize before switching to protected mode.
144 */
145 /* 将eflags寄存器清零 */
146 pushl $0
147 popfl
148
149 #ifdef CONFIG_SMP
150 andl %ebx,%ebx
151 jz 1f /* Initial CPU cleans BSS */
152 jmp checkCPUtype
153 1:
154 #endif /* CONFIG_SMP */
155
156 /*
157 * start system 32-bit setup. We need to re-do some of the things done
158 * in 16-bit mode for the "real" operations.
159 */
160 call setup_idt /* 设置中断描述附表 */
161
162 /*
163 * Copy bootup parameters out of the way.
164 * Note: %esi still has the pointer to the real-mode data.
165 */
166 movl $boot_params,%edi
167 movl $(PARAM_SIZE/4),%ecx
168 cld
169 rep
170 movsl
171 movl boot_params+NEW_CL_POINTER,%esi
172 andl %esi,%esi
173 jnz 2f # New command line protocol
174 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
175 jne 1f
176 movzwl OLD_CL_OFFSET,%esi
177 addl $(OLD_CL_BASE_ADDR),%esi
178 2:
179 movl $saved_command_line,%edi
180 movl $(COMMAND_LINE_SIZE/4),%ecx
181 rep
182 movsl
183 1:
184 checkCPUtype:
185
186 movl $-1,X86_CPUID # -1 for no CPUID initially
187
188 /* check if it is 486 or 386. */
189 /*
190 * XXX - this does a lot of unnecessary setup. Alignment checks don't
191 * apply at our cpl of 0 and the stack ought to be aligned already, and
192 * we don't need to preserve eflags.
193 */
194
195 movb $3,X86 # at least 386
196 pushfl # push EFLAGS
197 popl %eax # get EFLAGS
198 movl %eax,%ecx # save original EFLAGS
199 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
200 pushl %eax # copy to EFLAGS
201 popfl # set EFLAGS
202 pushfl # get new EFLAGS
203 popl %eax # put it in eax
204 xorl %ecx,%eax # change in flags
205 pushl %ecx # restore original EFLAGS
206 popfl
207 testl $0x40000,%eax # check if AC bit changed
208 je is386
209
210 movb $4,X86 # at least 486
211 testl $0x200000,%eax # check if ID bit changed
212 je is486
213
214 /* get vendor info */
215 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
216 cpuid
217 movl %eax,X86_CPUID # save CPUID level
218 movl %ebx,X86_VENDOR_ID # lo 4 chars
219 movl %edx,X86_VENDOR_ID+4 # next 4 chars
220 movl %ecx,X86_VENDOR_ID+8 # last 4 chars
221
222 orl %eax,%eax # do we have processor info as well?
223 je is486
224
225 movl $1,%eax # Use the CPUID instruction to get CPU type
226 cpuid
227 movb %al,%cl # save reg for future use
228 andb $0x0f,%ah # mask processor family
229 movb %ah,X86
230 andb $0xf0,%al # mask model
231 shrb $4,%al
232 movb %al,X86_MODEL
233 andb $0x0f,%cl # mask mask revision
234 movb %cl,X86_MASK
235 movl %edx,X86_CAPABILITY
236
237 is486: movl $0x50022,%ecx # set AM, WP, NE and MP
238 jmp 2f
239
240 is386: movl $2,%ecx # set MP
241 2: movl %cr0,%eax
242 andl $0x80000011,%eax # Save PG,PE,ET
243 orl %ecx,%eax
244 movl %eax,%cr0
245
246 call check_x87
247 incb ready
248 lgdt cpu_gdt_descr
249 lidt idt_descr
250 ljmp $(__KERNEL_CS),$1f
251 1: movl $(__KERNEL_DS),%eax # reload all the segment registers
252 movl %eax,%ss # after changing gdt.
253
254 movl $(__USER_DS),%eax # DS/ES contains default USER segment
255 movl %eax,%ds
256 movl %eax,%es
257
258 xorl %eax,%eax # Clear FS/GS and LDT
259 movl %eax,%fs
260 movl %eax,%gs
261 lldt %ax
262 cld # gcc2 wants the direction flag cleared at all times
263 #ifdef CONFIG_SMP
264 movb ready, %cl
265 cmpb $1,%cl
266 je 1f # the first CPU calls start_kernel
267 # all other CPUs call initialize_secondary
268 call initialize_secondary
269 jmp L6
270 1:
271 #endif /* CONFIG_SMP */
272 call start_kernel /* 调用start_kernel函数 */
273 L6:
274 jmp L6 # main should never return here, but
275 # just in case, we know what happens.
2
3 /*
4 * Set segments to known values.
5 */
6 cld
7 lgdt boot_gdt_descr - __PAGE_OFFSET
8 movl $(__BOOT_DS),%eax
9 movl %eax,%ds
10 movl %eax,%es
11 movl %eax,%fs
12 movl %eax,%gs
13
14 /*
15 * Clear BSS first so that there are no surprises...
16 * No need to cld as DF is already clear from cld above...
17 */
18 xorl %eax,%eax
19 movl $__bss_start - __PAGE_OFFSET,%edi
20 movl $__bss_stop - __PAGE_OFFSET,%ecx
21 subl %edi,%ecx
22 shrl $2,%ecx
23 rep ; stosl
24
25 /*
26 * Initialize page tables. This creates a PDE and a set of page
27 * tables, which are located immediately beyond _end. The variable
28 * init_pg_tables_end is set up to point to the first "safe" location.
29 * Mappings are created both at virtual address 0 (identity mapping)
30 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
31 *
32 * Warning: don't use %esi or the stack in this code. However, %esp
33 * can be used as a GPR if you really need it...
34 */
35 page_pde_offset = (__PAGE_OFFSET >> 20); /* __PAGE_OFFSET是0xc0000000,page_pde_offset = 3072 = 0xc00,是页目录中的偏移 */
36
37 movl $(pg0 - __PAGE_OFFSET), %edi /* 将pg0对应的物理地址送到edi */
38 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx /* 将swapper_pg_dir(存放临时页全局目录的地址)送到edx */
39 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
40 10:
41 leal 0x007(%edi),%ecx /* Create PDE entry —— 构造一个页目录项(地址+标志位),把edi指向的物理地址加上0x007放入ecx中 */
42 movl %ecx,(%edx) /* 第一次循环时把ecx中的内容放入swapper_pg_dir的第0项里,第二次循环时把ecx中的内容放入swapper_pg_dir的第1项里 */
43 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
44 addl $4,%edx /* edx加上一个页目录项长度(4字节),指向页全局目录的下一个页目录项地址 */
45 movl $1024, %ecx /* 初始化1024个页目录项,设置计数器 */
46 11:
47 stosl /* eax -> [edi]; edi = edi + 4 */
48 addl $0x1000,%eax /* 更改eax的值,为下次stosl作准备 */
49 loop 11b /* 循环操作,其实就是初始化1024个页表项 */
50 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
51 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
52 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp /* ebp = edi指向的物理地址 + INIT_MAP_BEYOND_END(128K)+ 0x007 */
53 cmpl %ebp,%eax /* 比较ebp与eax */
54 jb 10b /* 如果eax < ebp,则跳到上面10的地方 */
55 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) /* 此时的edi中存放pg0+0x2000,将此值存入init_pg_tables_end中,表示页表初始化结束 */
56
57 #ifdef CONFIG_SMP
58 xorl %ebx,%ebx /* This is the boot CPU (BSP) */
59 jmp 3f
60
61 /*
62 * Non-boot CPU entry point; entered from trampoline.S
63 * We can't lgdt here, because lgdt itself uses a data segment, but
64 * we know the trampoline has already loaded the boot_gdt_table GDT
65 * for us.
66 */
67 ENTRY(startup_32_smp)
68 cld
69 movl $(__BOOT_DS),%eax
70 movl %eax,%ds
71 movl %eax,%es
72 movl %eax,%fs
73 movl %eax,%gs
74
75 /*
76 * New page tables may be in 4Mbyte page mode and may
77 * be using the global pages.
78 *
79 * NOTE! If we are on a 486 we may have no cr4 at all!
80 * So we do not try to touch it unless we really have
81 * some bits in it to set. This won't work if the BSP
82 * implements cr4 but this AP does not -- very unlikely
83 * but be warned! The same applies to the pse feature
84 * if not equally supported. --macro
85 *
86 * NOTE! We have to correct for the fact that we're
87 * not yet offset PAGE_OFFSET..
88 */
89 #define cr4_bits mmu_cr4_features-__PAGE_OFFSET
90 movl cr4_bits,%edx
91 andl %edx,%edx
92 jz 6f
93 movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
94 orl %edx,%eax
95 movl %eax,%cr4
96
97 btl $5, %eax # check if PAE is enabled
98 jnc 6f
99
100 /* Check if extended functions are implemented */
101 movl $0x80000000, %eax
102 cpuid
103 cmpl $0x80000000, %eax
104 jbe 6f
105 mov $0x80000001, %eax
106 cpuid
107 /* Execute Disable bit supported? */
108 btl $20, %edx
109 jnc 6f
110
111 /* Setup EFER (Extended Feature Enable Register) */
112 movl $0xc0000080, %ecx
113 rdmsr
114
115 btsl $11, %eax
116 /* Make changes effective */
117 wrmsr
118
119 6:
120 /* This is a secondary processor (AP) */
121 xorl %ebx,%ebx
122 incl %ebx
123
124 3:
125 #endif /* CONFIG_SMP */
126
127 /*
128 * Enable paging
129 */
130 movl $swapper_pg_dir-__PAGE_OFFSET,%eax
131 movl %eax,%cr3 /* set the page table pointer.. ——将页全局目录的地址送入cr3寄存器中 */
132 movl %cr0,%eax
133 orl $0x80000000,%eax /* 设置PG位 */
134 movl %eax,%cr0 /* ..and set paging (PG) bit */
135 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
136 1:
137 /* Set up the stack pointer */
138 lss stack_start,%esp /* 建立内核堆栈 */
139
140 /*
141 * Initialize eflags. Some BIOS's leave bits like NT set. This would
142 * confuse the debugger if this code is traced.
143 * XXX - best to initialize before switching to protected mode.
144 */
145 /* 将eflags寄存器清零 */
146 pushl $0
147 popfl
148
149 #ifdef CONFIG_SMP
150 andl %ebx,%ebx
151 jz 1f /* Initial CPU cleans BSS */
152 jmp checkCPUtype
153 1:
154 #endif /* CONFIG_SMP */
155
156 /*
157 * start system 32-bit setup. We need to re-do some of the things done
158 * in 16-bit mode for the "real" operations.
159 */
160 call setup_idt /* 设置中断描述附表 */
161
162 /*
163 * Copy bootup parameters out of the way.
164 * Note: %esi still has the pointer to the real-mode data.
165 */
166 movl $boot_params,%edi
167 movl $(PARAM_SIZE/4),%ecx
168 cld
169 rep
170 movsl
171 movl boot_params+NEW_CL_POINTER,%esi
172 andl %esi,%esi
173 jnz 2f # New command line protocol
174 cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
175 jne 1f
176 movzwl OLD_CL_OFFSET,%esi
177 addl $(OLD_CL_BASE_ADDR),%esi
178 2:
179 movl $saved_command_line,%edi
180 movl $(COMMAND_LINE_SIZE/4),%ecx
181 rep
182 movsl
183 1:
184 checkCPUtype:
185
186 movl $-1,X86_CPUID # -1 for no CPUID initially
187
188 /* check if it is 486 or 386. */
189 /*
190 * XXX - this does a lot of unnecessary setup. Alignment checks don't
191 * apply at our cpl of 0 and the stack ought to be aligned already, and
192 * we don't need to preserve eflags.
193 */
194
195 movb $3,X86 # at least 386
196 pushfl # push EFLAGS
197 popl %eax # get EFLAGS
198 movl %eax,%ecx # save original EFLAGS
199 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
200 pushl %eax # copy to EFLAGS
201 popfl # set EFLAGS
202 pushfl # get new EFLAGS
203 popl %eax # put it in eax
204 xorl %ecx,%eax # change in flags
205 pushl %ecx # restore original EFLAGS
206 popfl
207 testl $0x40000,%eax # check if AC bit changed
208 je is386
209
210 movb $4,X86 # at least 486
211 testl $0x200000,%eax # check if ID bit changed
212 je is486
213
214 /* get vendor info */
215 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
216 cpuid
217 movl %eax,X86_CPUID # save CPUID level
218 movl %ebx,X86_VENDOR_ID # lo 4 chars
219 movl %edx,X86_VENDOR_ID+4 # next 4 chars
220 movl %ecx,X86_VENDOR_ID+8 # last 4 chars
221
222 orl %eax,%eax # do we have processor info as well?
223 je is486
224
225 movl $1,%eax # Use the CPUID instruction to get CPU type
226 cpuid
227 movb %al,%cl # save reg for future use
228 andb $0x0f,%ah # mask processor family
229 movb %ah,X86
230 andb $0xf0,%al # mask model
231 shrb $4,%al
232 movb %al,X86_MODEL
233 andb $0x0f,%cl # mask mask revision
234 movb %cl,X86_MASK
235 movl %edx,X86_CAPABILITY
236
237 is486: movl $0x50022,%ecx # set AM, WP, NE and MP
238 jmp 2f
239
240 is386: movl $2,%ecx # set MP
241 2: movl %cr0,%eax
242 andl $0x80000011,%eax # Save PG,PE,ET
243 orl %ecx,%eax
244 movl %eax,%cr0
245
246 call check_x87
247 incb ready
248 lgdt cpu_gdt_descr
249 lidt idt_descr
250 ljmp $(__KERNEL_CS),$1f
251 1: movl $(__KERNEL_DS),%eax # reload all the segment registers
252 movl %eax,%ss # after changing gdt.
253
254 movl $(__USER_DS),%eax # DS/ES contains default USER segment
255 movl %eax,%ds
256 movl %eax,%es
257
258 xorl %eax,%eax # Clear FS/GS and LDT
259 movl %eax,%fs
260 movl %eax,%gs
261 lldt %ax
262 cld # gcc2 wants the direction flag cleared at all times
263 #ifdef CONFIG_SMP
264 movb ready, %cl
265 cmpb $1,%cl
266 je 1f # the first CPU calls start_kernel
267 # all other CPUs call initialize_secondary
268 call initialize_secondary
269 jmp L6
270 1:
271 #endif /* CONFIG_SMP */
272 call start_kernel /* 调用start_kernel函数 */
273 L6:
274 jmp L6 # main should never return here, but
275 # just in case, we know what happens.