根据向勇老师的指示,在这里分享一下我跟踪do_fork()及相关创建线程代码的过程。主要内容分为do_fork()主要涉及的子函数功能分析,以及完成do_fork()之后如何启动新的线程init。
Breakpoint 2, do_fork (clone_flags=256, stack=0, tf=0xc0126f54) at kern/process/proc.c:279 279 do_fork(uint32_t clone_flags, uintptr_t stack, struct trapframe *tf) { (gdb) l 274 * @clone_flags: used to guide how to clone the child process 275 * @stack: the parent's user stack pointer. if stack==0, It means to fork a kernel thread. 276 * @tf: the trapframe info, which will be copied to child process's proc->tf 277 */ 278 int 279 do_fork(uint32_t clone_flags, uintptr_t stack, struct trapframe *tf) { 280 int ret = -E_NO_FREE_PROC; 281 struct proc_struct *proc; 282 if (nr_process >= MAX_PROCESS) { 283 goto fork_out; (gdb) n这是初始界面,用list命令查看代码上下文,用next进行下一条代码(不进入函数),用step进行下一条代码(会进入函数)。
(gdb) n 280 int ret = -E_NO_FREE_PROC; (gdb) p ret $1 = 1211072
struct proc_struct *proc = kmalloc(sizeof(struct proc_struct)); if (proc != NULL) { proc->state = PROC_UNINIT; proc->pid = -1; proc->runs = 0; proc->kstack = 0; proc->need_resched = 0; proc->parent = NULL; proc->mm = NULL; memset(&(proc->context), 0, sizeof(struct context)); proc->tf = NULL; proc->cr3 = boot_cr3; proc->flags = 0; memset(proc->name, 0, PROC_NAME_LEN); } return proc;可以看出是对TCB的变量进行初始设置。
struct Page *page = alloc_pages(KSTACKPAGE); if (page != NULL) { proc->kstack = (uintptr_t)page2kva(page); return 0; } return -E_NO_MEM;这里很重要的一点是调用之前我们实现的alloc_pages,并且把proc->kstack指向新的页面,page2kva会算出page指针相对于pages这个起始地址的偏移,进而可以得到其相对的内核虚拟地址。如果页面内存不足,则返回失败。
static void copy_thread(struct proc_struct *proc, uintptr_t esp, struct trapframe *tf) { proc->tf = (struct trapframe *)(proc->kstack + KSTACKSIZE) - 1; *(proc->tf) = *tf; proc->tf->tf_regs.reg_eax = 0; proc->tf->tf_esp = esp; proc->tf->tf_eflags |= FL_IF;proc->context.eip = (uintptr_t)forkret;
proc->context.esp = (uintptr_t)(proc->tf);
}
proc->context.eip = (uintptr_t)forkret; proc->context.esp = (uintptr_t)(proc->tf);
这里十分重要,会设置指令指针eip为forkret函数,esp为父进程的中断栈帧,这样进入子进程后就会调用forkret函数,具体等我们后面跟踪到子进程启动再说。
ide_init(); // init ide devices swap_init(); // init swap clock_init(); // init clock interrupt intr_enable(); // enable irq interrupt cpu_idle(); // run idle process
这一系列初始化,而cpu_idle()看名字就知道是我们所关心的了,进去之后,
void cpu_idle(void) { while (1) { if (current->need_resched) { schedule(); } } }
我们发现他会循环判断当前进程/线程current->need_resched是否为真。在之前跟踪的过程中,在alloc_pro()函数里,我们初始化的子线程的need_resched设置的为0,而在proc_init()函数里:
idleproc->pid = 0; idleproc->state = PROC_RUNNABLE; idleproc->kstack = (uintptr_t)bootstack; idleproc->need_resched = 1; set_proc_name(idleproc, "idle"); nr_process ++; current = idleproc;
我们可以看到idleproc也即空闲进程的need_resched初始是为1的,所current->need_resched为1,所以会在cpu_idle()里判断为真,进入schedule()函数。
load_esp0(next->kstack + KSTACKSIZE); lcr3(next->cr3); switch_to(&(prev->context), &(next->context));
这三条语句就是proc_run()的核心,第一条语句修改TSS任务状态栈,将TSS的ts_esp0(stack pointers and segment selectors)指向下一个进程的堆栈空间(不知这里和copy_thread()里设置proc->tf = (*trap_frame)(proc->kstack + KSTACKSIZE) - 1 有什么关联?暂时没有理解)。第二条语句修改cr3,即页表基址。第三条语句进行切换,这里便是IDE无法继续查看调用的地方了,而用gdb仍可以很方便地跟踪。
switch_to () at kern/process/switch.S:6 6 movl 4(%esp), %eax # eax points to from (gdb) s 7 popl 0(%eax) # save eip !popl switch_to () at kern/process/switch.S:8 8 movl %esp, 4(%eax) 9 movl %ebx, 8(%eax) 10 movl %ecx, 12(%eax) 11 movl %edx, 16(%eax) 12 movl %esi, 20(%eax) 13 movl %edi, 24(%eax) 14 movl %ebp, 28(%eax) 17 movl 4(%esp), %eax # not 8(%esp): popped return address already 19 movl 28(%eax), %ebp switch_to () at kern/process/switch.S:20 20 movl 24(%eax), %edi 21 movl 20(%eax), %esi 22 movl 16(%eax), %edx 23 movl 12(%eax), %ecx 24 movl 8(%eax), %ebx 25 movl 4(%eax), %esp 27 pushl 0(%eax) # push eip switch_to () at kern/process/switch.S:29 29 ret forkret () at kern/process/proc.c:193 193 forkret(void) { forkret () at kern/process/proc.c:194 194 forkrets(current->tf); forkrets () at kern/trap/trapentry.S:48 48 movl 4(%esp), %esp (gdb) l 43 iret 44 45 .globl forkrets 46 forkrets: 47 # set stack to this new process's trapframe 48 movl 4(%esp), %esp 49 jmp __trapret (gdb) s forkrets () at kern/trap/trapentry.S:49 49 jmp __trapret (gdb) __trapret () at kern/trap/trapentry.S:33 33 popal (gdb) __trapret () at kern/trap/trapentry.S:36 36 popl %gs 37 popl %fs 38 popl %es 39 popl %ds 42 addl $0x8, %esp 43 iret 5 pushl %edx # push arg 6 call *%ebx # call fn (gdb) s init_main (arg=<error reading variable: Unknown argument list address for `arg'.>) at kern/process/proc.c:359 359 init_main(void *arg) {
可以看到首先是我们讨论很多的两段代码,取esp+4(movl 4(%esp), %eax),然后保存现场,存当前一系列寄存器到第一个参数的内存空间里,然后取esp+8(movl 4(%esp), %eax),这里由于已经pop过了,所以只需继续+4,然后将第二个参数的内存空间里的值赋给一系列寄存器。事实上两个参数就是&(prev->context), &(next->context),保存了各自的上下文。
42 addl $0x8, %esp (gdb) 43 iret (gdb) 5 pushl %edx # push arg (gdb) 6 call *%ebx # call fn
这里将esp加8,执行中断返回iret,跳转到文件第5、6行,但是并不是原文件的5、6行...后来我发现是entry.S的5、6行,这里如何自动跳转的我并不明了。总之call *%ebx就进入了init_main(),就开始执行子进程的代码了。
(gdb) s kernel_thread_entry () at kern/process/entry.S:8 8 pushl %eax # save the return value of fn(arg) (gdb) 9 call do_exit # call do_exit to terminate current thread (gdb) do_exit (error_code=<error reading variable: Unknown argument list address for `error_code'.>) at kern/process/proc.c:353 353 do_exit(int error_code) { (gdb) do_exit (error_code=0) at kern/process/proc.c:354 354 panic("process exit!!.\n"); (gdb)