一、大概过程
seL4内核是遵循multiboot specification的,可以直接通过GRUB引导。在GRUB引导完成后,会把CPU控制权跳转到kernel入口;再由内核初始化自己的栈、GDT、IDT、Page Table等等;最后跳转到内核C入口。本文以x86架构64位的seL4内核为参考对象,进行分析。
二、启动流程
1. 内核汇编32位入口_start
- _start入口作为内核汇编32位代码的入口,在GRUB将内核程序搬移到内存后,最终会跳转到这一个入口,GRUB的任务就完成了。
- 这个入口加载启动时候的栈到esp,清除eflags,并将GRUB获取的硬件信息(eax,multiboot_magic; ebx, multiboot_info_ptr)压栈,再调用common_init。
- 调用common_init接口初始化页表结构、64位执行环境。最后通过ljmp指令跳转到64位指令入口。
BEGIN_FUNC(_start)
/* Assume we are MultiBooted, e.g. by GRUB.
* While not immediately checked, the magic number is checked prior to
* Multiboot dependent operations. */
movl %eax, %edi /* multiboot_magic */
movl %ebx, %esi /* multiboot_info_ptr */
/* Load kernel boot stack pointer. */
leal boot_stack_top, %esp
/* Reset EFLAGS register (also disables interrupts etc.). */
pushl $0
popf
/* Already push parameters for calling boot_sys later. Push
* them as 8 byte values so we can easily pop later. */
pushl $0
pushl %esi /* 2nd parameter: multiboot_info_ptr */
pushl $0
pushl %edi /* 1st parameter: multiboot_magic */
call common_init
/* Reload CS with long bit to enable long mode. */
ljmp $8, $_start64
END_FUNC(_start)
1.1 common_init接口
这个接口主要是调用子接口,就不再分析了。
BEGIN_FUNC(common_init)
/* Disable paging. */
movl %cr0, %eax
andl $0x7fffffff, %eax
movl %eax, %cr0
#ifdef CONFIG_FSGSBASE_INST
call fsgsbase_enable
#endif /* CONFIG_FSGSBASE_INST */
/* Initialize boot PML4 and switch to long mode. */
call setup_pml4
call enable_x64_mode
lgdt _gdt64_ptr
#ifdef CONFIG_SYSCALL
call syscall_enable
#endif
ret
END_FUNC(common_init)
1.2 setup_pml4接口分析
- 这个接口作为初始化过程中最重要的接口,负责初始化系统一开始的页表结构,必须要仔细分析。
- 如果页表结构初始化失败,那么在访问未映射的地址,会触发PF(page fault);而在系统初始化初期,并未安装对应的处理函数,又会触发DF(double fault);然而这时候还是没有对于的处理函数,则会出发TF(triple fault)。triple fault是很严重的系统错误,会使得CPU进入SHUT DOWN状态,然后硬件电路强制使整个硬件系统重启。
1.3 映射结构分析
我对于源码做了相应备注,直接粘贴出来,如下:
BEGIN_FUNC(setup_pml4)
#ifdef CONFIG_HUGE_PAGE
call huge_page_check
#endif /* CONFIG_HUGE_PAGE */
movl %cr0, %eax /* disable paging */
andl $0x7fffffff, %eax
movl %eax, %cr0
movl $boot_pml4, %edi /* get pml4 base addr */
movl $0x0, %edx
movl $1024, %ecx
1:
movl %edx, (%edi) /* this loop clear plm4 */
addl $4, %edi
loop 1b
movl $boot_pdpt, %edi
movl $1024, %ecx
1:
movl %edx, (%edi) /* this loop clear pdpt */
addl $4, %edi
loop 1b
movl $boot_pml4, %edi /* thoes line, to fill plm4 (512 x 8B = 4KB) */
movl $boot_pdpt, %ecx
orl $0x7, %ecx /* aligned by 8 bytes */
movl %ecx, (%edi) /* index: 0x0 -> pdpt:0 */
movl %ecx, 0x800(%edi) /* index: 0x100(256) -> pdpt:0 */
movl %ecx, 4088(%edi) /* index: 0x1ff(511) -> pdpt:0 */
movl $_boot_pd, %ecx /* thoes line, to fill pdpt (512 x 8B = 4KB) */
orl $0x7, %ecx /* aligned by 8 bytes */
movl $boot_pdpt, %edi
movl %ecx, (%edi) /* index: 0x0 -> pd:0 */
movl %ecx, 4080(%edi) /* index: 0x1fe(510) -> pd:0 */
addl $0x1000, %ecx
movl %ecx, 8(%edi) /* index: 0x1 -> pd:0x200(512) */
addl $0x1000, %ecx
movl %ecx, 16(%edi) /* index: 0x2 -> pd:0x400(1024) */
addl $0x1000, %ecx
movl %ecx, 24(%edi) /* index: 0x3 -> pd:0x800(1024 + 512) */
/* Map first 4GiB into the _boot_pd. */
movl $_boot_pd, %edi /* thoes line, to fill pd (2048 x 8B = 16KB) */
movl $2048, %ecx /* 2048 * 2M = 4G, define loop times use %ecx */
movl $0x87, %edx /* page attribute, 0x87(Present, Write, User) */
2:
movl %edx, (%edi) /* index: 0,1,2,... till 4GB map finish */
addl $0x200000, %edx /* physic addr, 1-to-1 mapping of first 4GB */
addl $8, %edi
loop 2b
ret
END_FUNC(setup_pml4)
目前做了第一版手绘的页表模型(后期改为电子版),对应的抽象模型如下:
1.3.1 说明
- PD 为4个表组成,对应了0~1G, 1~2G, 2~3G, 3~4G的物理内存地址,占用2K * 8B = 16KB空间。
-
PDPT 为1个表组成,下标
0-3
的分别指向PD_0, PD_1, PD_2, PD_3,倒数第二个表项510
(0x1fe)指向PD_0, 其他项均被初始化为0(空),占用 512 * 8B = 4KB空间。 -
PML4 为1个表组成,下标为
0
,256
(0x100),511
(0x1ff, the last)均指向 PDPT,占用 512 * 8B = 4KB空间。
1.4 enable_x64_mode接口分析
主要流程为如下:
- cr3 = pml4, 加载顶层页表基地址;
- cr4[5] = 1,开启PAE;
- IA32_EFER_MSR[8] = 1, 开启Long Mode 使能位;
- cr0[31] = 1, 开启分页功能;
再调用本接口后,执行ljmp加载64位代码段的CS, OFFSET。CPU便进入到64位模式。
BEGIN_FUNC(enable_x64_mode)
#ifdef CONFIG_SUPPORT_PCID
call pcid_check
call invpcid_check
#endif
/* Put base pointer in cr3. */
movl $boot_pml4, %eax
movl %eax, %cr3
/* Set PAE (bit 5), as this is required before switching to long mode. */
movl %cr4, %eax
orl $0x20, %eax
movl %eax, %cr4
/* Set LME (bit 8) in the extended feature MSR. */
movl $IA32_EFER_MSR, %ecx
rdmsr
orl $0x100, %eax
wrmsr
/* Set PG (bit 31) of cr0 to enable paging. */
movl %cr0, %eax
orl $0x80000000, %eax
movl %eax, %cr0
#ifdef CONFIG_SUPPORT_PCID
/* Enable PCID (bit 17), must be done in long mode. */
movl %cr4, %eax
orl $0x20000, %eax
movl %eax, %cr4
#endif
ret
END_FUNC(enable_x64_mode)
2. 内核汇编64位入口_start64
该段程序主要加载使CPU跳转到内核高端虚拟地址_entry_64,执行内核程序。
.align 4096
BEGIN_FUNC(_start64)
/* Leave phys code behind and jump to the high kernel virtual address. */
movabs $_entry_64, %rax
jmp *%rax
END_FUNC(_start64)
3. 内核汇编64位入口_entry_64
该接口主要将内核栈更新到rsp,并将GRUB传递的两个参数Pop到rdi, rsi(64位C程序,从左往右第一、二个参数),然后压入restore_user_context接口(具体功能暂时未研究)地址,作为boot_sys的返回地址。最后跳转到64位的内核C接口boot_sys执行系统初始化的详细工作。
BEGIN_FUNC(_entry_64)
/* Update our stack pointer. */
movq $0xffffffff80000000, %rax
addq %rax, %rsp
addq %rax, %rbp
/* Pop the multiboot parameters off. */
pop %rdi
pop %rsi
/* Load our real kernel stack. */
leaq kernel_stack_alloc + (1 << CONFIG_KERNEL_STACK_BITS), %rsp
movabs $restore_user_context, %rax
push %rax
jmp boot_sys
END_FUNC(_entry_64)