二：操作系统的引导（2）

setup模块，即setup.s

好！我们刚刚说到了CPU现在已经跳到了90200这里执行了，也就是这个setup.s。接下来我们就来看看这一段代码。


!

!   setup.s (C) 1991 Linus Torvalds

!

! setup.s is responsible for getting the system data from the BIOS,

! and putting them into the appropriate places in system memory.

! both setup.s and system has been loaded by the bootblock.

!

! This code asks the bios for memory/disk/other parameters, and

! puts them in a "safe" place: 0x90000-0x901FF, ie where the

! boot-block used to be. It is then up to the protected mode

! system to read them from there before the area is overwritten

! for buffer-blocks.

!

! NOTE! These had better be the same as in bootsect.s!

INITSEG  = 0x9000   ! we move boot here - out of the way

SYSSEG  = 0x1000    ! system loaded at 0x10000 (65536).

SETUPSEG = 0x9020   ! this is the current segment

.globl begtext, begdata, begbss, endtext, enddata, endbss

.text

begtext:

.data

begdata:

.bss

begbss:

.text

entry start

start:

! ok, the read went well so we get current cursor position and save it for

! posterity.

mov ax,#INITSEG ! this is done in bootsect already, but...

mov ds,ax

mov ah,#0x03    ! read cursor pos

xor bh,bh

int 0x10    ! save it in known place, con_init fetches

mov [0],dx  ! it from 0x90000.

! Get memory size (extended mem, kB)

mov ah,#0x88

int 0x15

mov [2],ax

! Get video-card data:

mov ah,#0x0f

int 0x10

mov [4],bx  ! bh = display page

mov [6],ax  ! al = video mode, ah = window width

! check for EGA/VGA and some config parameters

mov ah,#0x12

mov bl,#0x10

int 0x10

mov [8],ax

mov [10],bx

mov [12],cx

! Get hd0 data

mov ax,#0x0000

mov ds,ax

lds si,[4*0x41]

mov ax,#INITSEG

mov es,ax

mov di,#0x0080

mov cx,#0x10

rep

movsb

! Get hd1 data

mov ax,#0x0000

mov ds,ax

lds si,[4*0x46]

mov ax,#INITSEG

mov es,ax

mov di,#0x0090

mov cx,#0x10

rep

movsb

! Check that there IS a hd1 :-)

mov ax,#0x01500

mov dl,#0x81

int 0x13

jc  no_disk1

cmp ah,#3

je  is_disk1

no_disk1:

mov ax,#INITSEG

mov es,ax

mov di,#0x0090

mov cx,#0x10

mov ax,#0x00

rep

stosb

is_disk1:

! now we want to move to protected mode ...

cli ! no interrupts allowed !

! first we move the system to it's rightful place

mov ax,#0x0000

cld ! 'direction'=0, movs moves forward

do_move:

mov es,ax   ! destination segment

add ax,#0x1000

cmp ax,#0x9000

jz  end_move

mov ds,ax   ! source segment

sub di,di

sub si,si

mov cx,#0x8000

rep

movsw

jmp do_move

! then we load the segment descriptors

end_move:

mov ax,#SETUPSEG    ! right, forgot this at first. didn't work :-)

mov ds,ax

lidt    idt_48  ! load idt with 0,0

lgdt    gdt_48  ! load gdt with whatever appropriate

! that was painless, now we enable A20

call    empty_8042

mov al,#0xD1    ! command write

out #0x64,al

call    empty_8042

mov al,#0xDF    ! A20 on

out #0x60,al

call    empty_8042

! well, that went ok, I hope. Now we have to reprogram the interrupts :-(

! we put them right after the intel-reserved hardware interrupts, at

! int 0x20-0x2F. There they won't mess up anything. Sadly IBM really

! messed this up with the original PC, and they haven't been able to

! rectify it afterwards. Thus the bios puts interrupts at 0x08-0x0f,

! which is used for the internal hardware interrupts as well. We just

! have to reprogram the 8259's, and it isn't fun.

mov al,#0x11    ! initialization sequence

out #0x20,al    ! send it to 8259A-1

.word   0x00eb,0x00eb   ! jmp $+2, jmp $+2

out #0xA0,al    ! and to 8259A-2

.word   0x00eb,0x00eb

mov al,#0x20    ! start of hardware int's (0x20)

out #0x21,al

.word   0x00eb,0x00eb

mov al,#0x28    ! start of hardware int's 2 (0x28)

out #0xA1,al

.word   0x00eb,0x00eb

mov al,#0x04    ! 8259-1 is master

out #0x21,al

.word   0x00eb,0x00eb

mov al,#0x02    ! 8259-2 is slave

out #0xA1,al

.word   0x00eb,0x00eb

mov al,#0x01    ! 8086 mode for both

out #0x21,al

.word   0x00eb,0x00eb

out #0xA1,al

.word   0x00eb,0x00eb

mov al,#0xFF    ! mask off all interrupts for now

out #0x21,al

.word   0x00eb,0x00eb

out #0xA1,al

! well, that certainly wasn't fun :-(. Hopefully it works, and we don't

! need no steenking BIOS anyway (except for the initial loading :-).

! The BIOS-routine wants lots of unnecessary data, and it's less

! "interesting" anyway. This is how REAL programmers do it.

!

! Well, now's the time to actually move into protected mode. To make

! things as simple as possible, we do no register set-up or anything,

! we let the gnu-compiled 32-bit programs do that. We just jump to

! absolute address 0x00000, in 32-bit protected mode.

mov ax,#0x0001  ! protected mode (PE) bit

lmsw    ax  ! This is it!

jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

! This routine checks that the keyboard command queue is empty

! No timeout is used - if this hangs there is something wrong with

! the machine, and we probably couldn't proceed anyway.

empty_8042:

.word   0x00eb,0x00eb

in  al,#0x64    ! 8042 status port

test    al,#2   ! is input buffer full?

jnz empty_8042  ! yes - loop

ret

gdt:

.word   0,0,0,0 ! dummy

.word   0x07FF  ! 8Mb - limit=2047 (2048*4096=8Mb)

.word   0x0000  ! base address=0

.word   0x9A00  ! code read/exec

.word   0x00C0  ! granularity=4096, 386

.word   0x07FF  ! 8Mb - limit=2047 (2048*4096=8Mb)

.word   0x0000  ! base address=0

.word   0x9200  ! data read/write

.word   0x00C0  ! granularity=4096, 386

idt_48:

.word   0   ! idt limit=0

.word   0,0 ! idt base=0L

gdt_48:

.word   0x800   ! gdt limit=2048, 256 GDT entries

.word   512+gdt,0x9 ! gdt base = 0X9xxxx

.text

endtext:

.data

enddata:

.bss

endbss:

image.png


INITSEG  = 0x9000   ! we move boot here - out of the way

SYSSEG  = 0x1000    ! system loaded at 0x10000 (65536).

SETUPSEG = 0x9020   ! this is the current segment

……………………

start:

! ok, the read went well so we get current cursor position and save it for

! posterity.

mov ax,#INITSEG ! this is done in bootsect already, but...

mov ds,ax

mov ah,#0x03    ! read cursor pos

xor bh,bh

int 0x10    ! save it in known place, con_init fetches

mov [0],dx  ! it from 0x90000.

! Get memory size (extended mem, kB)

mov ah,#0x88

int 0x15

mov [2],ax

…………………………

! now we want to move to protected mode ...

cli ! no interrupts allowed !

! first we move the system to it's rightful place

mov ax,#0x0000

cld ! 'direction'=0, movs moves forward

do_move:

mov es,ax   ! destination segment

add ax,#0x1000

cmp ax,#0x9000

jz  end_move

mov ds,ax   ! source segment

sub di,di

sub si,si

mov cx,#0x8000

rep

movsw

jmp do_move

再具体分一下来分析


start:

! ok, the read went well so we get current cursor position and save it for

! posterity.

    mov ax,#INITSEG ! this is done in bootsect already, but...

    mov ds,ax

    mov ah,#0x03    ! read cursor pos

    xor bh,bh

    int 0x10        ! save it in known place, con_init fetches

    mov [0],dx      ! it from 0x90000.

这里又是一个获取光标的中断，我们来拿上次获取光标的代码来进行对比。


! Print some inane message

mov ah,#0x03    ! read cursor pos

xor bh,bh

int 0x10

mov cx,#24

mov bx,#0x0007  ! page 0, attribute 7 (normal)

mov bp,#msg1

mov ax,#0x1301  ! write string, move cursor

int 0x10

这种套路代码，遇多了，我们都知道，获取光标的代码应该就是


mov ah,#0x03    ! read cursor pos

xor bh,bh

int 0x10

我们就可以猜想下


    mov [0],dx      ! it from 0x90000.

就是把调用int 0x10后获取的光标位置甚至可能还有别的什么具体的参数呀会存放在某个寄存器中，比如这里可能就是dx。为了保留这些数据，我们把寄存储器的信息要转移到内存中去。这段代码的意思就是这样了。那我们就好奇这个[0]是啥了？

我们知道mov 内存单元地址，寄存器名。[……]表示一个内存但愿，里面的数字表示内存单元的偏移地址。但是我们又知道只有偏移地址是不可以确定一个内存单元的，那么那个内存单元的段地址是什么呢？指令在执行是，CPU会自动取ds中的数据作为内存单元的段地址。


INITSEG  = 0x9000  ! we move boot here - out of the way

……………………

start:

    mov ax,#INITSEG ! this is done in bootsect already, but...

    mov ds,ax

    mov ah,#0x03    ! read cursor pos

    xor bh,bh

    int 0x10        ! save it in known place, con_init fetches

    mov [0],dx      ! it from 0x90000.

那我们现在就可以知道ds=9000了。那我们就可以知道我们实际上把dx的值赋到了内存的90000处啦。


! Get memory size (extended mem, kB)

mov ah,#0x88

int 0x15

mov [2],ax

有了先前的先给参数赋值，然后再调用中断执行，我们就大概知道这里可能就是ah是个参数，然后我们执行了一个不知道干什么的15号中断，最后ax可能就获取到了什么数据，然后把这个数据赋值到了90002处。

好啦！现在我告诉你，这个是获取内存大小的中断，有点就是测量你到底有多少钱，然后方便以后管理这些钱，把多少多少钱拿去干什么，把另外的多少钱拿去干什么，我们拿去做事的钱当然不能超过现在的钱咯！所以，这里获取的就是现在可用的内存，方便以后的分配。以后看到这种中断的时候，如果是在学习时，我们看旁边的注释，猜想一下前面的给寄存器赋值可能是参数就够了，不要太陷进去，我们现在不是要写操作系统，我们是学习操作系统的理念，观点一定要明确。

image.png

后面很多都是这种中断，然后获取的值就存放到内中里去。就省略不说了。


! now we want to move to protected mode ...

cli ! no interrupts allowed !

! first we move the system to it's rightful place

mov ax,#0x0000

cld ! 'direction'=0, movs moves forward

do_move:

mov es,ax   ! destination segment

add ax,#0x1000

cmp ax,#0x9000

jz  end_move

mov ds,ax   ! source segment

sub di,di

sub si,si

mov cx,#0x8000

rep

movsw

jmp do_move

cli 开始就不再允许中断，说明大概要的数据我们已经要的差不多了。

我们看到move，看来这又是一段和上次一样的，把一段代码移动到另一个地址的了。我们看下上次的那段代码，来对比一下。


BOOTSEG  = 0x07c0   ! original address of boot-sector

INITSEG  = 0x9000   ! we move boot here - out of the way

………………

_start:

mov ax,#BOOTSEG

mov ds,ax

mov ax,#INITSEG

mov es,ax

mov cx,#256

sub si,si

sub di,di

rep

movw

jmpi    go,INITSEG

go: mov ax,cs

mov ds,ax

mov es,ax

上面这是上一次移动bootsect.s的代码。

image.png

复习一下，ds=07c0,es=9000，ds:si = 7c00 ,es:di = 9000，可以猜的出ds是源地址，es是目的地址；cs存放的是要移动多少字节，接下来就是rep movw移动了;最后就是jmpi，把CPU指向移动后的位置在先前已经执行到的地方开始执行。

那我们再看这个图，我们就知道等等要移动的可能就是system，这个模块。

我们不看代码，来模范一下。

应该是源地址ds:si = 10000,目的地址es:di = 00000；移动多少呢？还不知道！反正cx = 某个数值，再执行rep movw ，最后跳转到那里执行 jmpi；

好，现在看下代码，进行对比；


! first we move the system to it's rightful place

mov ax,#0x0000

cld ! 'direction'=0, movs moves forward

do_move:

mov es,ax   ! destination segment

add ax,#0x1000

cmp ax,#0x9000

jz  end_move

mov ds,ax   ! source segment

sub di,di

sub si,si

mov cx,#0x8000

rep

movsw

jmp do_move

目的：es:di = 0000:0

源：ds:si = 1000:0

移动了8000字（64kb字节）

移动后，

jmp再让CPU跳转到do_move这里

细心的就会发现少了一个i。而且好像挺奇怪的，怎么又跳到开头，再执行一次？就算是少了i，也不应该这样呀！

我们发现了，这里的代码还有一点点不一样比如说多了？


cmp ax,#0x9000

jz  end_move

这里的大概的意思就，你确定代码都移动完了？如果移动完了，我们就结束不在移动代码了。其实就等于一个校验的判断啦！现在明白了吧！

我们现在又明白了一个套路，移动代码的套路


    mov ax,#源地址

mov ds,ax

mov ax,#目的地址

mov es,ax

mov cx,#移动多少字数

sub si,si

sub di,di

rep

movw

jmp(i)  移动后开始执行的地方

那这里的jmpi，我们再思考一下，我们上次是把自己移动过去，如果CPU原本是在原来代码的地址那里开始执行的，我们移动后，那里就没了，我们要用jumpi告诉CPU，这里的i是重新给CPU一个新的地址，让他从那里开始不断的下一条下一条开始执行。那我们这里是移动别人，所以CPU还是在原来的这里移动的，所以我们不用重新赋给CPU一个新的开始，因为CPU会一直下一条下一条执行下去。

我们来看下setup.s一开始的说明


!

!   setup.s (C) 1991 Linus Torvalds

!

! setup.s is responsible for getting the system data from the BIOS,

! and putting them into the appropriate places in system memory.

! both setup.s and system has been loaded by the bootblock.

!

! This code asks the bios for memory/disk/other parameters, and

! puts them in a "safe" place: 0x90000-0x901FF, ie where the

! boot-block used to be. It is then up to the protected mode

! system to read them from there

看来我们已经完成了七七八八了，好像就少一个up to the protected mode；翻译过来就是保护模式，看到模式，我们是不是想到了一开始的。

image.png

开始说过的实模式，现在说要到保护模式。

将setup移到0地址处...

image.png


! well, that certainly wasn't fun :-(. Hopefully it works, and we don't

! need no steenking BIOS anyway (except for the initial loading :-).

! The BIOS-routine wants lots of unnecessary data, and it's less

! "interesting" anyway. This is how REAL programmers do it.

!

! Well, now's the time to actually move into protected mode. To make

! things as simple as possible, we do no register set-up or anything,

! we let the gnu-compiled 32-bit programs do that. We just jump to

! absolute address 0x00000, in 32-bit protected mode.

mov ax,#0x0001  ! protected mode (PE) bit

lmsw    ax  ! This is it!

jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

实模式和保护模式

我们先不看代码。我们来看下保护模式和实模式先。

我们先前说过8086CPU是16位的，但是我们仔细想下，我们平时听到的都是32位和64位的CPU，很少听到16位的呀！是呀！16位的CPU已经是好久好久以前的CPU了。在那个时代16位的CPU还是很厉害的，但是在我们这个时代，已经太落伍了！我们都已经步入64普遍的时代了，那我们现在要说的是32位的。80x86，听这名字就知道是8086的增强型。好了，现在我们明白了我们的CPU可是32位的咯！

那32位的CPU意位着什么？复习一下前面的。

那我们回顾一下16位CPU的处理思路。

那什么是16位结构的CPU呢？

运算器一次最多可以处理16位的数据；

寄存器的最大宽度为16位；

寄存器和运算器之间的通路为16位；

也就是说，在8086内部，能够一次性处理，传输，暂时存储的信息的最大长度是16位的。内存单元的地址在送上地址总线之前，必须在CPU中处理，传输，暂时存放，对于16位CPU，能一次性处理，传输，暂时存储16位的地址。

计算机的发展有个特别好的也特别烦的传统，就是向下兼容！所以我们在32位的CPU下执行了16位CPU的那种处理思路。我们就把那种思路称为实模式，那32位的处理思路就是保护模式啦。

那两者有什么不同呢？

我们先说下CPU处理能力。

我们说过16CPU一次只能处理16位的数据，通过段地址+段偏移这种模式，我们的地址达到了20位，也就是可以寻址的范围扩大到了2^20=1M。

哈哈，1M，现在看到这个数字是不是要笑死了！我们现在的内存基本都是4G起步，常见都是8G，这1M。。。看起来真的就很尴尬了！

那现在我们看到32位CPU处理的数据应该就是32位了，那就是2^32=4G，感觉好像是挺大的了，但是实际上，我刚刚说过了现在都是4G起步啦，所以还是不够大，所以32位的CPU也会想出一种办法来扩大自己的内存。为什么不用原来的那种？这种问题。。。那。。。那是变成36位吗？这还是我们不知道地址线的宽度乱说的。好像这么做也可以。一种新的模式颠覆旧的模式，可能不仅仅是改良这么简单，会有新的功能添加。比如后面说到的保护问题。先不说这些。

分段机制

我们来思考一下，如果给你一国家，你会如何管理？

我们知道每个地方都有一个唯一的经玮度，但是你在管理国家的时候，会直接用经玮度来指定一个地方吗？

我们看下秦始皇就很聪明的使用了郡县制，额，就是类似把现在的国家分成了省市这样。

这样我们来定位一个城市的时候，就很方便了！我们把中国分成若干个省份，省份里面再分市。那我们确定一个城市时，就是先给出一个省份，确定省份后，我们就知道获取这个省份里面有哪些市可供选择。

image.png

这是我们平时添加收货地址的方式，我们选定了一省份后，就会提供我们一个表，这个表里面有着该省的市级单位，我们再通过这个表中的内容，就可以确定到了中国里一个特定的市级单位了。

那计算机也采用了类似的管理方式，称为分段机制

image.png

段选择符

先不看TI和RPL，我们就可以猜到描述符索引这里应该指向一个表，这个表称为段描述符表

image.png

根据图，我们可以知道：

段描述符表是段描述符的一个数组，描述符表的长度可变（这个还真看不出来），最多可以包含8192个8字节描述符。有两种描述符表：全局描述符表GDT和局部描述符表LDT。

这里我们又可以反推出上面的描述符中的TI的作用了：选择哪一个描述符表。

我们根据刚刚的收获地址，猜的出这里一个个的8字节的描述符对应的应该就一个一个城市了。有8位这么长，单单来放一个市的名称也太浪费了吧。所以计算机规定了一个结构。

image.png

8字节有64位，那这64位中，我们规定了特定的位不同的值对应着不同的意思。总的来说是分成了三个部分

段地址
段限长
段属性

image.png

到这里，我们已经像填收获地址一样，已经具体到了市了，但是我们知道，还不够的，我们还要写具体到XX区XX街道XX楼XX号XXX人收。这么复杂的信息，计算机就用了32位来让我们具体表示，就是偏移量

image.png

呼一口气，我们终于让这个地址具体到了快递小哥可以送上门的地步了。

就像我们填快递的时候，不需要了解一个市级单位到底有什么政策的时候，当然有时候是需要了解的，比如说：该市只允许顺丰快递。这就很尴尬了，如果我们用的是申通快递，那我们永远是无法收到货的。但是，先别管那么多了。反正我们知道，我们这样填，快递小哥理论上是可以送达的就可以了。

我们回顾一下：

在实模式下，我们是通过 段地址X16 + 段偏移 来确定位置的。

在保护模式下，我们通过 段选择符选定的段描述符里面的基地址+偏移量 确定的。

那。。。电脑什么时候用实模式什么时候用保护模式呢？

进入保护模式

就像看美国白宫里面现在住的是哪位总统，我们就可以知道这个国家现在是由谁在统治一样。计算机里面也有一个位置来标名，这时，电脑是在什么模式下工作，那就控制寄存器CR0

image.png

当CR0中的PE=1时，就是保护模式；

当CR0中的PE=0时，就是实模式；

image.png


! Well, now's the time to actually move into protected mode. To make

! things as simple as possible, we do no register set-up or anything,

! we let the gnu-compiled 32-bit programs do that. We just jump to

! absolute address 0x00000, in 32-bit protected mode.

mov ax,#0x0001  ! protected mode (PE) bit

lmsw    ax  ! This is it!

jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

此时，再看这代码，就特别简单了。

ax=0001换成二进制，最后一位肯定是1呀


lmsw    ax  ! This is it!

实际上就是

    mov cr0, ax

是为了兼容以前的286CPU

ax=0001换成二进制，最后一位肯定是1呀！

那再把ax赋给cr0，那PE一定是1呀！

那这个时候，计算机已经是保护模式了！


jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

我们可以知道cs=8,ip=0；然后，这里可就不能理解成地址为80啦。我们要用分段机制来思考这个问题。根据刚刚说的：

在保护模式下，我们通过段选择符选定的段描述符里面的基地址+偏移量确定的。

那我们已经知道偏移量，ip=0，cs=8，应该就是表里面8这个位置，这个位置有我们需要的信息，这个信息加上ip就能得出地址，那表呢？

内存管理寄存器

我们就只看GDTR这个寄存器先。

GDTR：全局描述符表寄存器

32位线性基地址，一看就知道，这个地址是告诉我们那个表的所在位置，就像C语言中你给了一个数组名，实际上这个数组名就是该内存空间的首地址，如果是占四个字节的int类型，每次数组+1就是地址加4个字节，指向第二个数。16位表长度，我们先前提过。

段描述符表是段描述符的一个数组，描述符表的长度可变（这个还真看不出来），最多可以包含8192个8字节描述符。有两种描述符表：全局描述符表GDT和局部描述符表LDT。

8192个8字节，8192 x 8 = 65536个 ; 2^16 =65536，那我们现在可以知道为什么最多是包含8192个8字节描述符了吧！这里就是告诉一个数组到底有多少项的意思。

每个内存管理寄存器，都有对应的两条指令：加载和保护

那对于GDTR就是：

加载：LGDT
保存：SGDT


end_move:

mov ax,#SETUPSEG    ! right, forgot this at first. didn't work :-)

mov ds,ax

lidt    idt_48  ! load idt with 0,0

lgdt    gdt_48  ! load gdt with whatever appropriate

………………

gdt:

.word   0,0,0,0 ! dummy

.word   0x07FF  ! 8Mb - limit=2047 (2048*4096=8Mb)

.word   0x0000  ! base address=0

.word   0x9A00  ! code read/exec

.word   0x00C0  ! granularity=4096, 386

.word   0x07FF  ! 8Mb - limit=2047 (2048*4096=8Mb)

.word   0x0000  ! base address=0

.word   0x9200  ! data read/write

.word   0x00C0  ! granularity=4096, 386

…………

gdt_48:

.word   0x800   ! gdt limit=2048, 256 GDT entries

.word   512+gdt,0x9 ! gdt base = 0X9xxxx

image.png

我们说过段描述符是8字节的。我们是按n*8来选择特定的描述符的。


jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

………………

gdt:

.word   0,0,0,0 ! dummy

.word   0x07FF  ! 8Mb - limit=2047 (2048*4096=8Mb)

.word   0x0000  ! base address=0

.word   0x9A00  ! code read/exec

.word   0x00C0  ! granularity=4096, 386

!.word表示一个字，

！.word  0,0,0,0 是简写实际上是：

！.word  0

！.word  0

！.word  0

！.word  0

我们知道段选择符选定的段描述符是有8字节，也是2个字，64个位；

段描述符通用格式

07FF：放到段限长

0000: 放到0~15的基地址

9A00: 低8位00放在16~23的基地址那

00C0: 高8位00放在24~31的基地址那

那我们把这些基址合起来：0000 00 00


jmpi    0,8 ! jmp offset 0 of segment 8 (cs)

那就是选到GDT里面的8字节处，也就是第二个那里，地址就是0000 0000 加上偏移0，那就是到0000 0000 0处了。

image.png

哈哈！又是这张图，看到代码执行位置线路没，那就是到0x0000处啦。

好了！到这里，操作系统的引导就完成了！system模块会一直在这里，以后的操作也会一直在这里执行。那这里的第一段代码是啥呢？如图我们可知，是head.s

head.s //一段在保护模式下运行的代码

image.png

再坚持一下，把head.s说完，引导就全部说完了。


/*

*  linux/boot/head.s

*

*  (C) 1991  Linus Torvalds

*/

/*

*  head.s contains the 32-bit startup code.

*

* NOTE!!! Startup happens at absolute address 0x00000000, which is also where

* the page directory will exist. The startup code will be overwritten by

* the page directory.

*/

.text

.globl idt,gdt,pg_dir,tmp_floppy_area

pg_dir:

.globl startup_32

startup_32:

movl $0x10,%eax

mov %ax,%ds

mov %ax,%es

mov %ax,%fs

mov %ax,%gs

lss stack_start,%esp

call setup_idt

call setup_gdt

movl $0x10,%eax # reload all the segment registers

mov %ax,%ds # after changing gdt. CS was already

mov %ax,%es # reloaded in 'setup_gdt'

mov %ax,%fs

mov %ax,%gs

lss stack_start,%esp

xorl %eax,%eax

1:  incl %eax   # check that A20 really IS enabled

movl %eax,0x000000  # loop forever if it isn't

cmpl %eax,0x100000

je 1b

/*

* NOTE! 486 should set bit 16, to check for write-protect in supervisor

* mode. Then it would be unnecessary with the "verify_area()"-calls.

* 486 users probably want to set the NE (#5) bit also, so as to use

* int 16 for math errors.

*/

movl %cr0,%eax  # check math chip

andl $0x80000011,%eax   # Save PG,PE,ET

/* "orl $0x10020,%eax" here for 486 might be good */

orl $2,%eax # set MP

movl %eax,%cr0

call check_x87

jmp after_page_tables

/*

* We depend on ET to be correct. This checks for 287/387.

*/

check_x87:

fninit

fstsw %ax

cmpb $0,%al

je 1f   /* no coprocessor: have to set bits */

movl %cr0,%eax

xorl $6,%eax    /* reset MP, set EM */

movl %eax,%cr0

ret

.align 2

1:  .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */

ret

/*

*  setup_idt

*

*  sets up a idt with 256 entries pointing to

*  ignore_int, interrupt gates. It then loads

*  idt. Everything that wants to install itself

*  in the idt-table may do so themselves. Interrupts

*  are enabled elsewhere, when we can be relatively

*  sure everything is ok. This routine will be over-

*  written by the page tables.

*/

setup_idt:

lea ignore_int,%edx

movl $0x00080000,%eax

movw %dx,%ax    /* selector = 0x0008 = cs */

movw $0x8E00,%dx    /* interrupt gate - dpl=0, present */

lea idt,%edi

mov $256,%ecx

rp_sidt:

movl %eax,(%edi)

movl %edx,4(%edi)

addl $8,%edi

dec %ecx

jne rp_sidt

lidt idt_descr

ret

/*

*  setup_gdt

*

*  This routines sets up a new gdt and loads it.

*  Only two entries are currently built, the same

*  ones that were built in init.s. The routine

*  is VERY complicated at two whole lines, so this

*  rather long comment is certainly needed :-).

*  This routine will beoverwritten by the page tables.

*/

setup_gdt:

lgdt gdt_descr

ret

/*

* I put the kernel page tables right after the page directory,

* using 4 of them to span 16 Mb of physical memory. People with

* more than 16MB will have to expand this.

*/

.org 0x1000

pg0:

.org 0x2000

pg1:

.org 0x3000

pg2:

.org 0x4000

pg3:

.org 0x5000

/*

* tmp_floppy_area is used by the floppy-driver when DMA cannot

* reach to a buffer-block. It needs to be aligned, so that it isn't

* on a 64kB border.

*/

tmp_floppy_area:

.fill 1024,1,0

after_page_tables:

pushl $0    # These are the parameters to main :-)

pushl $0

pushl $0

pushl $L6   # return address for main, if it decides to.

pushl $main

jmp setup_paging

L6:

jmp L6  # main should never return here, but

# just in case, we know what happens.

/* This is the default interrupt "handler" :-) */

int_msg:

.asciz "Unknown interrupt\n\r"

.align 2

ignore_int:

pushl %eax

pushl %ecx

pushl %edx

push %ds

push %es

push %fs

movl $0x10,%eax

mov %ax,%ds

mov %ax,%es

mov %ax,%fs

pushl $int_msg

call printk

popl %eax

pop %fs

pop %es

pop %ds

popl %edx

popl %ecx

popl %eax

iret

/*

* Setup_paging

*

* This routine sets up paging by setting the page bit

* in cr0. The page tables are set up, identity-mapping

* the first 16MB. The pager assumes that no illegal

* addresses are produced (ie >4Mb on a 4Mb machine).

*

* NOTE! Although all physical memory should be identity

* mapped by this routine, only the kernel page functions

* use the >1Mb addresses directly. All "normal" functions

* use just the lower 1Mb, or the local data space, which

* will be mapped to some other place - mm keeps track of

* that.

*

* For those with more memory than 16 Mb - tough luck. I've

* not got it, why should you :-) The source is here. Change

* it. (Seriously - it shouldn't be too difficult. Mostly

* change some constants etc. I left it at 16Mb, as my machine

* even cannot be extended past that (ok, but it was cheap :-)

* I've tried to show which constants to change by having

* some kind of marker at them (search for "16Mb"), but I

* won't guarantee that's all :-( )

*/

.align 2

setup_paging:

movl $1024*5,%ecx   /* 5 pages - pg_dir+4 page tables */

xorl %eax,%eax

xorl %edi,%edi  /* pg_dir is at 0x000 */

cld;rep;stosl

movl $pg0+7,pg_dir  /* set present bit/user r/w */

movl $pg1+7,pg_dir+4    /*  --------- " " --------- */

movl $pg2+7,pg_dir+8    /*  --------- " " --------- */

movl $pg3+7,pg_dir+12   /*  --------- " " --------- */

movl $pg3+4092,%edi

movl $0xfff007,%eax /*  16Mb - 4096 + 7 (r/w user,p) */

std

1:  stosl   /* fill pages backwards - more efficient :-) */

subl $0x1000,%eax

jge 1b

xorl %eax,%eax  /* pg_dir is at 0x0000 */

movl %eax,%cr3  /* cr3 - page directory start */

movl %cr0,%eax

orl $0x80000000,%eax

movl %eax,%cr0  /* set paging (PG) bit */

ret /* this also flushes prefetch-queue */

.align 2

.word 0

idt_descr:

.word 256*8-1   # idt contains 256 entries

.long idt

.align 2

.word 0

gdt_descr:

.word 256*8-1   # so does gdt (not that that's any

.long gdt   # magic number, but it works for me :^)

.align 8

idt:    .fill 256,8,0   # idt is uninitialized

gdt:    .quad 0x0000000000000000    /* NULL descriptor */

.quad 0x00c09a0000000fff    /* 16Mb */

.quad 0x00c0920000000fff    /* 16Mb */

.quad 0x0000000000000000    /* TEMPORARY - don't use */

.fill 252,8,0   /* space for LDT's and TSS's etc */

image.png

startup_32:
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    mov %ax,%fs
    mov %ax,%gs
    lss stack_start,%esp
    call setup_idt
    call setup_gdt
    movl $0x10,%eax     # reload all the segment registers
    mov %ax,%ds     # after changing gdt. CS was already
    mov %ax,%es     # reloaded in 'setup_gdt'
    mov %ax,%fs
    mov %ax,%gs
    lss stack_start,%esp
    xorl %eax,%eax
1:  incl %eax       # check that A20 really IS enabled
    movl %eax,0x000000  # loop forever if it isn't
    cmpl %eax,0x100000
    je 1b


    movl %cr0,%eax      # check math chip
    andl $0x80000011,%eax   # Save PG,PE,ET
    orl $2,%eax     # set MP
    movl %eax,%cr0
    call check_x87
    jmp after_page_tables

………………


setup_idt:
    lea ignore_int,%edx
    movl $0x00080000,%eax
    movw %dx,%ax        /* selector = 0x0008 = cs */
    movw $0x8E00,%dx    /* interrupt gate - dpl=0, present */

    lea idt,%edi
    mov $256,%ecx
rp_sidt:
    movl %eax,(%edi)
    movl %edx,4(%edi)
    addl $8,%edi
    dec %ecx
    jne rp_sidt
    lidt idt_descr
    ret

看这段代码，知道这里又重置了GDT还有IDT，说明，前面的那些GDT和IDT的作用只是让我们把system放到这里。上面没有说到的IDT，现在来说下。

内存管理寄存器

与先前提到的GDTR，IDTR寄存器用于存放中断描述符表IDT。通用有指令LIDT和SIDT分别用于加载和保存IDTR寄存器的内容。好，我们大概知道这也是一表的存在，再回想起以前我们经常的 int 0x10 或者int 0x13,就是到这表查询我们要执行的指令就可以了，后面到时说到接口的时候，还会说到。

还有打开A20；
看到20，一定很眼熟，因为我们先前提到8086CPU有20根地址线。

image.png

如果我们不打开的话，我们的地址线永远都是20，那我们最大的寻址范围也是就2^20=1M，只有当我们打开这个线的时候，我们才真正的拥有了32位CPU的真正潜力：访问4G的内存。

代码的最后是一个跳转

/* "orl $0x10020,%eax" here for 486 might be good */
    orl $2,%eax     # set MP
    movl %eax,%cr0
    call check_x87
    jmp after_page_tables

那接下来，我们就去跳转到那里干了什么。

语言的变化

image.png

语言改变了，到时具体的时候，再说新的语言怎么使用。这里大概知道三点就好了。

立即操作数前加$

寄存器前加%

绝对跳转或调用前加/

立即操作数：指令要操作的数据以常量的形式出现在指令中，称为立即数，它只能作为源操作数

after_page_tables //设置了页表之后

image.png

after_page_tables:
    pushl $0        # These are the parameters to main :-)
    pushl $0
    pushl $0
    pushl $L6       # return address for main, if it decides to.
    pushl $main
    jmp setup_paging
L6:
    jmp L6          # main should never return here, but
                # just in case, we know what happens.

image.png

setup_paging:
    movl $1024*5,%ecx       /* 5 pages - pg_dir+4 page tables */
    xorl %eax,%eax
    xorl %edi,%edi          /* pg_dir is at 0x000 */
    cld;rep;stosl
    movl $pg0+7,pg_dir      /* set present bit/user r/w */
    movl $pg1+7,pg_dir+4        /*  --------- " " --------- */
    movl $pg2+7,pg_dir+8        /*  --------- " " --------- */
    movl $pg3+7,pg_dir+12       /*  --------- " " --------- */
    movl $pg3+4092,%edi
    movl $0xfff007,%eax     /*  16Mb - 4096 + 7 (r/w user,p) */
    std
1:  stosl           /* fill pages backwards - more efficient :-) */
    subl $0x1000,%eax
    jge 1b
    xorl %eax,%eax      /* pg_dir is at 0x0000 */
    movl %eax,%cr3      /* cr3 - page directory start */
    movl %cr0,%eax
    orl $0x80000000,%eax
    movl %eax,%cr0      /* set paging (PG) bit */
    ret         /* this also flushes prefetch-queue */

jmp set_paging 到这里后，最后执行ret
main出栈，main开始执行
三个0分别是envp,argv,argc都为main的参数，L6是main的返回值。如果L6返回的话

L6:
    jmp L6          # main should never return here, but
                # just in case, we know what happens.

就会进入无限循环，所以 main是不能返回的。

进入main函数

/*
 *  linux/init/main.c
 *
 *  (C) 1991  Linus Torvalds
 */

// 宏定义"__LIBRARY__" 是为了包括定义再unistd.h中的内嵌汇编代码等信息。
#define __LIBRARY__
// *.h头文件所在的默认目录是include/，则再代码中就不用明确指明其位置。
// 如果不是unix的标准头文件，则需要指明所在的目录，并用双引号括住。
// unistd.h是标准符号常数与类型头文件。其中定义了各种符号常数和类型，
// 并声明了各种函数。如果还定义了符号__LIBRARY__,则还会包含系统调用和
// 内嵌汇编代码syscall10()等。
#include <unistd.h>
#include <time.h>       // 时间类型头文件。其中主要定义了tm结构和一些有关时间的函数原型

/*
 * we need this inline - forking from kernel space will result
 * in NO COPY ON WRITE (!!!), until an execve is executed. This
 * is no problem, but for the stack. This is handled by not letting
 * main() use the stack at all after fork(). Thus, no function
 * calls - which means inline code for fork too, as otherwise we
 * would use the stack upon exit from 'fork()'.
 *
 * Actually only pause and fork are needed inline, so that there
 * won't be any messing with the stack from main(), but we define
 * some others too.
 */
// Linux在内核空间创建进程时不使用写时复制技术(Copy on write).main()在移动到用户
// 模式（到任务0）后执行内嵌方式的fork()和pause(),因此可保证不适用任务0的用户栈。
// 在执行moveto_user_mode()之后，本程序main()就以任务0的身份在运行了。而任务0是
// 所有将将创建子进程的父进程。当它创建ygie子进程时(init进程)，由于任务1代码属于
// 内核空间，因此没有使用写时复制功能。此时任务0的用户栈就是任务1的用户栈，即它们
// 共同使用一个栈空间。因此希望在main.c运行在任务0的环境下不要有对堆栈的任何操作，
// 以免弄乱堆栈。而在再次执行fork()并执行过execve()函数后，被加载程序已不属于内核空间
// 因此可以使用写时复制技术了。
//
// 下面_syscall0()是unistd.h中的内嵌宏代码。以嵌入汇编的形式调用Linux的系统调用中断
// 0x80.该中断是所有系统调用的入口。该条语句实际上是int fork()创建进程系统调用。可展
// 开看之就会立刻明白。syscall0名称中最后的0表示无参数，1表示1个参数。
static inline _syscall0(int,fork)
// int pause() 系统调用，暂停进程的执行，直到收到一个信号
static inline _syscall0(int,pause)
// int setup(void * BIOS)系统调用，仅用于linux初始化(仅在这个程序中被调用)
static inline _syscall1(int,setup,void *,BIOS)
// int sync()系统调用：更新文件系统。
static inline _syscall0(int,sync)

// tty头文件，定义了有关tty_io, 串行通信方面的参数、常数
#include <linux/tty.h>
// 调度程序头文件，定义了任务结构task_struct、第1个初始任务的数据。还有一些以宏的形式
// 定义的有关描述符参数设置和获取的嵌入式汇编函数程序。
#include <linux/sched.h>
#include <linux/head.h>
// 以宏的形式定义了许多有关设置或修改描述符/中断门等嵌入式汇编子程序
#include <asm/system.h>
// 以宏的嵌入式汇编程序形式定义对IO端口操作的函数
#include <asm/io.h>

#include <stddef.h>
#include <stdarg.h>
#include <unistd.h>
// 用于文件及描述符的操作控制常数符号的定义
#include <fcntl.h>
#include <sys/types.h>
// 定义文件结构(file,buffer_head,m_inode等)
#include <linux/fs.h>

// 用于内核显示信息的缓存
static char printbuf[1024];

extern int vsprintf();
extern void init(void);
extern void blk_dev_init(void);
extern void chr_dev_init(void);
extern void hd_init(void);
extern void floppy_init(void);
extern void mem_init(long start, long end);
// 虚拟盘初始化
extern long rd_init(long mem_start, int length);
extern long kernel_mktime(struct tm * tm);      //计算系统开始启动时间（秒）
extern long startup_time;       // 内核启动时间（开机时间）（秒）

/*
 * This is set up by the setup-routine at boot-time
 */
// 下面三行分别将指定的线性地址强行转换为给定数据类型的指针，并获取指针所指
// 的内容。由于内核代码段被映射到从物理地址零开始的地方，因此这些线性地址
// 正好也是对应的物理地址。这些指定地址处内存值的含义请参见setup程序读取并保存的参数。
#define EXT_MEM_K (*(unsigned short *)0x90002)
#define DRIVE_INFO (*(struct drive_info *)0x90080)
#define ORIG_ROOT_DEV (*(unsigned short *)0x901FC)

/*
 * Yeah, yeah, it's ugly, but I cannot find how to do this correctly
 * and this seems to work. I anybody has more info on the real-time
 * clock I'd be interested. Most of this was trial and error, and some
 * bios-listing reading. Urghh.
 */
// 这段宏读取CMOS实时时钟信息，outb_p,inb_p是include/asm/io.h中定义的端口输入输出宏
// 0x70是写地址端口号，0x80|addr是读取的CMOS内存地址
// 0x71 是读取数据端口号
#define CMOS_READ(addr) ({ \
outb_p(0x80|addr,0x70); \
inb_p(0x71); \
})

// 将BCD码转换成二进制数值。BCD码利用半个字节（4 bit）表示一个10进制数，因此
// 一个字节表示2个10进制数。（val）&15取BCD表示10进制个位数，而(val)>>4 取BCD表示
// 的10进制十位数，再乘以10.因此最后两者相加就是一个字节BCD码的实际二进制数值。
#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)

// 该函数取CMOS实时时钟信息作为开机时间，并保存到全局变量startup_time（秒）中。
// kernel_mktime()用于计算从1970年1月1号0时起到开机当日经过的秒数，作为开机时间。
static void time_init(void)
{
    struct tm time;

    // CMOS的访问速度很慢，为了减少时间误差，在读取了下面循环中的所有数值后，如果此时
    // CMOS中秒值发生了变化，那么就重新读取所有值。这样内核就能把与CMOS时间误差控制在1秒之内。
    do {
        time.tm_sec = CMOS_READ(0);
        time.tm_min = CMOS_READ(2);
        time.tm_hour = CMOS_READ(4);
        time.tm_mday = CMOS_READ(7);
        time.tm_mon = CMOS_READ(8);
        time.tm_year = CMOS_READ(9);
    } while (time.tm_sec != CMOS_READ(0));
    BCD_TO_BIN(time.tm_sec);
    BCD_TO_BIN(time.tm_min);
    BCD_TO_BIN(time.tm_hour);
    BCD_TO_BIN(time.tm_mday);
    BCD_TO_BIN(time.tm_mon);
    BCD_TO_BIN(time.tm_year);
    time.tm_mon--;                              // tm_mon中月份的范围是0-11
    startup_time = kernel_mktime(&time);        // 计算开机时间。kernel/mktime.c文件
}

// 下面定义一些局部变量
static long memory_end = 0;                     // 机器具有的物理内存容量（字节数）
static long buffer_memory_end = 0;              // 高速缓冲区末端地址
static long main_memory_start = 0;              // 主内存（将用于分页）开始的位置

struct drive_info { char dummy[32]; } drive_info;  // 用于存放硬盘参数表信息

// 内核初始化主程序。初始化结束后将以任务0（idle任务即空闲任务）的身份运行。
void main(void)     /* This really IS void, no error here. */
{           /* The startup routine assumes (well, ...) this */
/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
    // 下面这段代码用于保存：
    // 根设备号 ->ROOT_DEV；高速缓存末端地址->buffer_memory_end;
    // 机器内存数->memory_end；主内存开始地址->main_memory_start；
    // 其中ROOT_DEV已在前面包含进的fs.h文件中声明为extern int
    ROOT_DEV = ORIG_ROOT_DEV;
    drive_info = DRIVE_INFO;        // 复制0x90080处的硬盘参数
    memory_end = (1<<20) + (EXT_MEM_K<<10);     // 内存大小=1Mb + 扩展内存(k)*1024 byte
    memory_end &= 0xfffff000;                   // 忽略不到4kb(1页)的内存数
    if (memory_end > 16*1024*1024)              // 内存超过16Mb，则按16Mb计
        memory_end = 16*1024*1024;
    if (memory_end > 12*1024*1024)              // 如果内存>12Mb,则设置缓冲区末端=4Mb 
        buffer_memory_end = 4*1024*1024;
    else if (memory_end > 6*1024*1024)          // 否则若内存>6Mb,则设置缓冲区末端=2Mb
        buffer_memory_end = 2*1024*1024;
    else
        buffer_memory_end = 1*1024*1024;        // 否则设置缓冲区末端=1Mb
    main_memory_start = buffer_memory_end;
    // 如果在Makefile文件中定义了内存虚拟盘符号RAMDISK,则初始化虚拟盘。此时主内存将减少。
#ifdef RAMDISK
    main_memory_start += rd_init(main_memory_start, RAMDISK*1024);
#endif
    // 以下是内核进行所有方面的初始化工作。阅读时最好跟着调用的程序深入进去看，若实在
    // 看不下去了，就先放一放，继续看下一个初始化调用。——这是经验之谈。o(∩_∩)o 。;-)
    mem_init(main_memory_start,memory_end); // 主内存区初始化。mm/memory.c
    trap_init();                            // 陷阱门(硬件中断向量)初始化，kernel/traps.c
    blk_dev_init();                         // 块设备初始化,kernel/blk_drv/ll_rw_blk.c
    chr_dev_init();                         // 字符设备初始化, kernel/chr_drv/tty_io.c
    tty_init();                             // tty初始化， kernel/chr_drv/tty_io.c
    time_init();                            // 设置开机启动时间 startup_time
    sched_init();                           // 调度程序初始化(加载任务0的tr,ldtr)(kernel/sched.c)
    // 缓冲管理初始化，建内存链表等。(fs/buffer.c)
    buffer_init(buffer_memory_end);
    hd_init();                              // 硬盘初始化，kernel/blk_drv/hd.c
    floppy_init();                          // 软驱初始化，kernel/blk_drv/floppy.c
    sti();                                  // 所有初始化工作都做完了，开启中断
    // 下面过程通过在堆栈中设置的参数，利用中断返回指令启动任务0执行。
    move_to_user_mode();                    // 移到用户模式下执行
    if (!fork()) {      /* we count on this going ok */
        init();                             // 在新建的子进程(任务1)中执行。
    }
/*
 *   NOTE!!   For any other task 'pause()' would mean we have to get a
 * signal to awaken, but task0 is the sole exception (see 'schedule()')
 * as task 0 gets activated at every idle moment (when no other tasks
 * can run). For task0 'pause()' just means we go check if some other
 * task can run, and if not we return here.
 */
    // pause系统调用会把任务0转换成可中断等待状态，再执行调度函数。但是调度函数只要发现系统中
    // 没有其他任务可以运行是就会切换到任务0，而不依赖于任务0的状态。
    for(;;) pause();
}

// 下面函数产生格式化信息并输出到标准输出设备stdout(1),这里是指屏幕上显示。参数'*fmt'
// 指定输出将采用的格式，具体可以看标准C语言书籍。该子程序正好是vsprintf如何使用一个
// 简单例子。该程序使用vsprintf()将格式化的字符串放入printfbuf缓冲区，然后用write()将
// 缓冲区的内容输出到标准设备(1--stdout).vsprintf()函数实现在kernel/vsprintf.c中。
static int printf(const char *fmt, ...)
{
    va_list args;
    int i;

    va_start(args, fmt);
    write(1,printbuf,i=vsprintf(printbuf, fmt, args));
    va_end(args);
    return i;
}

// 读取并执行/etc/rc文件时所使用的命令行参数和环境参数
static char * argv_rc[] = { "/bin/sh", NULL };      // 调用执行程序时参数字符串数组
static char * envp_rc[] = { "HOME=/", NULL };       // 调用执行程序时环境字符串数组

// 运行登录shell时所使用的命令行参数和环境参数
// 下面 argv[0]中的字符“-”是传递给shell程序sh的一个标志。通过识别该标志，
// sh程序会作为登录shell执行。其执行过程与在shell提示符下执行sh不一样。
static char * argv[] = { "-/bin/sh",NULL };
static char * envp[] = { "HOME=/usr/root", NULL };

// 在main()中已经进行了系统初始化，包括内存管理、各种硬件设备和驱动程序。init()函数
// 运行在任务0第1次创建的子进程(任务1)中。它首先对第一个将要执行的程序(shell)的环境
// 进行初始化，然后以登录shell方式加载该程序并执行。
void init(void)
{
    int pid,i;

    // setup()是一个系统调用。用于读取硬盘参数包括分区表信息并加载虚拟盘(若存在的话)
    // 和安装根文件系统设备。该函数用25行上的宏定义，对应函数是sys_setup()，在块设备
    // 子目录kernel/blk_drv/hd.c中。
    setup((void *) &drive_info);        // drive_info结构是2个硬盘参数表
    // 下面以读写访问方式打开设备"/dev/tty0",它对应终端控制台。由于这是第一次打开文件
    // 操作，因此产生的文件句柄号(文件描述符)肯定是0。该句柄是UNIX类操作系统默认的
    // 控制台标准输入句柄stdin。这里再把它以读和写的方式别人打开是为了复制产生标准输出(写)
    // 句柄stdout和标准出错输出句柄stderr。函数前面的"(void)"前缀用于表示强制函数无需返回值。
    (void) open("/dev/tty0",O_RDWR,0);
    (void) dup(0);                      // 复制句柄，产生句柄1号——stdout标准输出设备
    (void) dup(0);                      // 复制句柄，产生句柄2号——stderr标准出错输出设备
    // 打印缓冲区块数和总字节数，每块1024字节，以及主内存区空闲内存字节数
    printf("%d buffers = %d bytes buffer space\n\r",NR_BUFFERS,
        NR_BUFFERS*BLOCK_SIZE);
    printf("Free mem: %d bytes\n\r",memory_end-main_memory_start);
    // 下面fork()用于创建一个子进程(任务2)。对于被创建的子进程，fork()将返回0值，对于
    // 原进程(父进程)则返回子进程的进程号pid。该子进程关闭了句柄0(stdin)、以只读方式打开
    // /etc/rc文件，并使用execve()函数将进程自身替换成/bin/sh程序(即shell程序)，然后
    // 执行/bin/sh程序。然后执行/bin/sh程序。所携带的参数和环境变量分别由argv_rc和envp_rc
    // 数组给出。关闭句柄0并立即打开/etc/rc文件的作用是把标准输入stdin重定向到/etc/rc文件。
    // 这样shell程序/bin/sh就可以运行rc文件中的命令。由于这里的sh的运行方式是非交互的，
    // 因此在执行完rc命令后就会立刻退出，进程2也随之结束。
    // _exit()退出时出错码1 - 操作未许可；2 - 文件或目录不存在。
    if (!(pid=fork())) {
        close(0);
        if (open("/etc/rc",O_RDONLY,0))
            _exit(1);                       // 如果打开文件失败，则退出(lib/_exit.c)
        execve("/bin/sh",argv_rc,envp_rc);  // 替换成/bin/sh程序并执行
        _exit(2);                           // 若execve()执行失败则退出。
    }
    // 下面还是父进程(1)执行语句。wait()等待子进程停止或终止，返回值应是子进程的进程号(pid).
    // 这三句的作用是父进程等待子进程的结束。&i是存放返回状态信息的位置。如果wait()返回值
    // 不等于子进程号，则继续等待。
    if (pid>0)
        while (pid != wait(&i))
            /* nothing */;
    // 如果执行到这里，说明刚创建的子进程的执行已停止或终止了。下面循环中首先再创建
    // 一个子进程，如果出错，则显示“初始化程序创建子进程失败”信息并继续执行。对于所
    // 创建的子进程将关闭所有以前还遗留的句柄(stdin, stdout, stderr),新创建一个会话
    // 并设置进程组号，然后重新打开/dev/tty0作为stdin,并复制成stdout和sdterr.再次
    // 执行系统解释程序/bin/sh。但这次执行所选用的参数和环境数组另选了一套。然后父
    // 进程再次运行wait()等待。如果子进程又停止了执行，则在标准输出上显示出错信息
    // “子进程pid挺直了运行，返回码是i”,然后继续重试下去....，形成一个“大”循环。
    // 此外，wait()的另外一个功能是处理孤儿进程。如果一个进程的父进程先终止了，那么
    // 这个进程的父进程就会被设置为这里的init进程(进程1)，并由init进程负责释放一个
    // 已终止进程的任务数据结构等资源。
    while (1) {
        if ((pid=fork())<0) {
            printf("Fork failed in init\r\n");
            continue;
        }
        if (!pid) {                                 // 新的子进程
            close(0);close(1);close(2);
            setsid();                               // 创建一新的会话期
            (void) open("/dev/tty0",O_RDWR,0);
            (void) dup(0);
            (void) dup(0);
            _exit(execve("/bin/sh",argv,envp));
        }
        while (1)
            if (pid == wait(&i))
                break;
        printf("\n\rchild %d died with code %04x\n\r",pid,i);
        sync();                                     // 同步操作，刷新缓冲区。
    }
    // _exit()和exit()都用于正常终止一个函数。但_exit()直接是一个sys_exit系统调用，
    // 而exit()则通常是普通函数库中的一个函数。它会先执行一些清除操作，例如调用
    // 执行各终止处理程序、关闭所有标准IO等，然后调用sys_exit。
    _exit(0);   /* NOTE! _exit, not exit() */
}

image.png

void main(void)     /* This really IS void, no error here. */
{
    mem_init(main_memory_start,memory_end); // 主内存区初始化。mm/memory.c
    trap_init();                            // 陷阱门(硬件中断向量)初始化，kernel/traps.c
    blk_dev_init();                         // 块设备初始化,kernel/blk_drv/ll_rw_blk.c
    chr_dev_init();                         // 字符设备初始化, kernel/chr_drv/tty_io.c
    tty_init();                             // tty初始化， kernel/chr_drv/tty_io.c
    time_init();                            // 设置开机启动时间 startup_time
    sched_init();                           // 调度程序初始化(加载任务0的tr,ldtr)(kernel/sched.c)
    // 缓冲管理初始化，建内存链表等。(fs/buffer.c)
    buffer_init(buffer_memory_end);
    hd_init();                              // 硬盘初始化，kernel/blk_drv/hd.c
    floppy_init();                          // 软驱初始化，kernel/blk_drv/floppy.c
    sti();                                  // 所有初始化工作都做完了，开启中断
    // 下面过程通过在堆栈中设置的参数，利用中断返回指令启动任务0执行。
    move_to_user_mode();                    // 移到用户模式下执行
    if (!fork()) {      /* we count on this going ok */
        init();                             // 在新建的子进程(任务1)中执行。
    }
    for(;;) pause();
}

里面都是一些视实化的函数，大同小异，我们拿一个mem_init来讲下，其余的就明白了。

看一看mem_init...

image.png

linux/mm/memory.c

void mem_init(long start_mem, long end_mem)
{
    int i;

    // 首先将1MB到16MB范围内所有内存页面对应的内存映射字节数组项置为已占用状态，
    // 即各项字节全部设置成USED(100)。PAGING_PAGES被定义为(PAGING_MEMORY>>12)，
    // 即1MB以上所有物理内存分页后的内存页面数(15MB/4KB = 3840).
    HIGH_MEMORY = end_mem;                  // 设置内存最高端(16MB)
    for (i=0 ; i<PAGING_PAGES ; i++)
        mem_map[i] = USED;
    // 然后计算主内存区起始内存start_mem处页面对应内存映射字节数组中项号i和主内存区页面数。
    // 此时mem_map[]数组的第i项正对应主内存区中第1个页面。最后将主内存区中页面对应的数组项
    // 清零(表示空闲)。对于具有16MB物理内存的系统，mem_map[]中对应4MB-16MB主内存区的项被清零。
    i = MAP_NR(start_mem);      // 主内存区其实位置处页面号
    end_mem -= start_mem;
    end_mem >>= 12;             // 主内存区中的总页面数
    while (end_mem-->0)
        mem_map[i++]=0;         // 主内存区页面对应字节值清零
}

右移12位，就是除以2^12=4k,也就是一页。

最后编辑于：2018.09.20 20:22:59

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 205,236评论 6赞 478
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 87,867评论 2赞 381
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 151,715评论 0赞 340
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 54,899评论 1赞 278
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 63,895评论 5赞 368
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 48,733评论 1赞 283
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 38,085评论 3赞 399
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 36,722评论 0赞 258
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 43,025评论 1赞 300
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 35,696评论 2赞 323
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 37,816评论 1赞 333
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 33,447评论 4赞 322
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 39,057评论 3赞 307
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 30,009评论 0赞 19
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 31,254评论 1赞 260
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 45,204评论 2赞 352
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 42,561评论 2赞 343

二：操作系统的引导（2）

setup模块，即setup.s

将setup移到0地址处...

实模式和保护模式

分段机制

进入保护模式

内存管理寄存器

head.s //一段在保护模式下运行的代码

after_page_tables //设置了页表之后

进入main函数

看一看mem_init...

推荐阅读更多精彩内容