MIT6.828 Fall2018 笔记 - Lab 4: Preemptive Multitasking

时间 2020-07-05

标签 mit6.828 mit fall2018 fall 笔记 lab preemptive multitasking 繁體版

原文原文链接

Part A: Multiprocessor Support and Cooperative Multitasking

Exercise 1

void *
mmio_map_region(physaddr_t pa, size_t size)
{
    static uintptr_t base = MMIOBASE;

    // Your code here:
    boot_map_region(kern_pgdir, base, size, pa, PTE_PCD | PTE_PWT | PTE_W);
    uintptr_t result = base;
    base += ROUNDUP(size, PGSIZE);
    return (void*)result;
}

Exercise 2

//  2) The rest of base memory, [PGSIZE, npages_basemem * PGSIZE)
    //     is free.
    for (i = 1; i < npages_basemem; i++) {
        if (i == MPENTRY_PADDR >> PGSHIFT) {
            pages[i].pp_ref = 1;
            continue;
        }
        pages[i].pp_ref = 0;
        pages[i].pp_link = page_free_list;
        page_free_list = &pages[i];
    }

如今能够看到：api

check_page_free_list() succeeded!
check_page_alloc() succeeded!
check_page() succeeded!

不过check_kern_pgdir()仍是会失败ide

Question 1

Compare kern/mpentry.S side by side with boot/boot.S. Bearing in mind that kern/mpentry.S is compiled and linked to run above KERNBASE just like everything else in the kernel, what is the purpose of macro MPBOOTPHYS? Why is it necessary in kern/mpentry.S but not in boot/boot.S? In other words, what could go wrong if it were omitted in kern/mpentry.S?
Hint: recall the differences between the link address and the load address that we have discussed in Lab 1.函数

MPBOOTPHYS宏的目的是计算绝对地址，由于在boot_aps()中memmove()将kern/mpentry.S的代码移动到了MPENTRY_PADDR，若是不使用MPBOOTPHYS宏，就会寻址到0xf0000000之上的地址，而实模式是只能寻址1M。oop

Exercise 3

static void
mem_init_mp(void)
{
    // LAB 4: Your code here:
    uintptr_t kstack_i = KSTACKTOP - KSTKSIZE;
    for (int i = 0; i < NCPU; i++) {
        boot_map_region(kern_pgdir, kstack_i, KSTKSIZE, PADDR(percpu_kstacks[i]), PTE_W);
        kstack_i -= KSTKSIZE + KSTKGAP;
    }
}

经过了以下测试：测试

check_kern_pgdir() succeeded!
check_page_free_list() succeeded!
check_page_installed_pgdir() succeeded!

Exercise 4

void
trap_init_percpu(void)
{
    // LAB 4: Your code here:

    // Setup a TSS so that we get the right stack
    // when we trap to the kernel.
    struct Taskstate* this_ts = &thiscpu->cpu_ts;
    int cpu_id = cpunum();
    this_ts->ts_esp0 = KSTACKTOP - cpu_id * (KSTKSIZE + KSTKGAP);
    this_ts->ts_ss0 = GD_KD;
    this_ts->ts_iomb = sizeof(struct Taskstate);

    // Initialize the TSS slot of the gdt.
    gdt[(GD_TSS0 >> 3) + cpu_id] = SEG16(STS_T32A, (uint32_t)(this_ts), sizeof(struct Taskstate) - 1, 0);
    gdt[(GD_TSS0 >> 3) + cpu_id].sd_s = 0;

    // Load the TSS selector (like other segment selectors, the
    // bottom three bits are special; we leave them 0)
    ltr(GD_TSS0 + (cpu_id << 3));

    // Load the IDT
    lidt(&idt_pd);
}

Exercise 5

//i386_init
lock_kernel();
boot_aps();

//mp_main
lock_kernel();
sched_yield();

//trap
if ((tf->tf_cs & 3) == 3) {
    lock_kernel();
    assert(curenv);
    ......
}
//env_run
lcr3(PADDR(curenv->env_pgdir));
unlock_kernel();
env_pop_tf(&(curenv->env_tf));

Exercise 6

kern/syscall.c中添加：ui

case SYS_yield:
        sys_yield();
        return 0;

修改i386_init()，因为user_primes程序中使用的fork系统调用还未实现，因此暂时注释掉，等实现了再取消注释：this

/*
#if defined(TEST)
    // Don't touch -- used by grading script!
    ENV_CREATE(TEST, ENV_TYPE_USER);
#else
    // Touch all you want.
    ENV_CREATE(user_primes, ENV_TYPE_USER);
#endif // TEST*
*/
    ENV_CREATE(user_yield, ENV_TYPE_USER);
    ENV_CREATE(user_yield, ENV_TYPE_USER);
    ENV_CREATE(user_yield, ENV_TYPE_USER);
    // Schedule and run the first user environment!
    sched_yield();
}

void
sched_yield(void)
{
    struct Env *idle;
    // LAB 4: Your code here.
    idle = curenv;
    size_t idx = idle != NULL ? ENVX(idle->env_id) : -1;
    for (size_t i = 0; i < NENV; i++) {
        idx = (idx + 1 == NENV) ? 0 : idx + 1;
        if (envs[idx].env_status == ENV_RUNNABLE) {
            env_run(&envs[idx]);
            return;
        }
    }
    if (idle && idle->env_status == ENV_RUNNING && idle->env_cpunum == cpunum()) {
        env_run(idle);
        return;
    }

    // sched_halt never returns
    sched_halt();
}

make qemu CPUS=2运行结果：spa

...
[00000000] new env 00001000
[00000000] new env 00001001
[00000000] new env 00001002
Hello, I am environment 00001000.
Hello, I am environment 00001001.
Back in environment 00001000, iteration 0.
Hello, I am environment 00001002.
Back in environment 00001001, iteration 0.
Back in environment 00001000, iteration 1.
Back in environment 00001001, iteration 1.
Back in environment 00001002, iteration 0.
Back in environment 00001000, iteration 2.
Back in environment 00001001, iteration 2.
Back in environment 00001002, iteration 1.
...

Exercise 7

将i386_init()改回来：unix

#if defined(TEST)
    // Don't touch -- used by grading script!
    ENV_CREATE(TEST, ENV_TYPE_USER);
#else
    // Touch all you want.
    ENV_CREATE(user_primes, ENV_TYPE_USER);
#endif // TEST*

    // Schedule and run the first user environment!
    sched_yield();
}

而后实现5个系统调用：rest

static envid_t
sys_exofork(void)
{
    // LAB 4: Your code here.
    struct Env* child_env;
    int result = env_alloc(&child_env, curenv->env_id);
    if (result != 0) // 若是alloc失败
        return result;
    child_env->env_status = ENV_NOT_RUNNABLE;
    child_env->env_tf = curenv->env_tf;    // 复制父进程的trapframe
    child_env->env_tf.tf_regs.reg_eax = 0; // 子进程的返回值
    return child_env->env_id;
}

static int
sys_env_set_status(envid_t envid, int status)
{
    // LAB 4: Your code here.
    struct Env* e;
    if (envid2env(envid, &e, 1) != 0)
        return -E_BAD_ENV;
    if (status != ENV_RUNNABLE && status != ENV_NOT_RUNNABLE)
        return -E_INVAL;
    e->env_status = status;
    return 0;
}

static int
sys_page_alloc(envid_t envid, void *va, int perm)
{
    // LAB 4: Your code here.
    struct Env* e;
    if (envid2env(envid, &e, 1) != 0)
        return -E_BAD_ENV;
    if ((uintptr_t)va >= UTOP || PGOFF(va) != 0 || perm < (PTE_U | PTE_P) || (perm & ~PTE_SYSCALL) != 0)
        return -E_INVAL;
    struct PageInfo* pp = page_alloc(ALLOC_ZERO);
    if (pp == NULL || page_insert(e->env_pgdir, pp, va, perm) != 0)
        return -E_NO_MEM;
    return 0;
}

static int
sys_page_map(envid_t srcenvid, void *srcva,
         envid_t dstenvid, void *dstva, int perm)
{
    // LAB 4: Your code here.
    struct Env *srcenv, *dstenv;
    if ((envid2env(srcenvid, &srcenv, 1) != 0) || (envid2env(dstenvid, &dstenv, 1) != 0))
        return -E_BAD_ENV;
    if ((uintptr_t)srcva >= UTOP || PGOFF(srcva) != 0 || (uintptr_t)dstva >= UTOP || PGOFF(dstva) != 0 || perm < (PTE_U | PTE_P) || (perm & ~PTE_SYSCALL) != 0)
        return -E_INVAL;
    pte_t* src_pte;
    struct PageInfo* pp = page_lookup(srcenv->env_pgdir, srcva, &src_pte);
    if ((perm & PTE_W) && (*src_pte & PTE_W) == 0)
        return -E_INVAL;
    if (page_insert(dstenv->env_pgdir, pp, dstva, perm) != 0)
        return -E_NO_MEM;
    return 0;
}

static int
sys_page_unmap(envid_t envid, void *va)
{
    // LAB 4: Your code here.
    struct Env* e;
    if ((envid2env(envid, &e, 1) != 0))
        return -E_BAD_ENV;
    if ((uintptr_t)va >= UTOP || PGOFF(va) != 0)
        return -E_INVAL;
    page_remove(e->env_pgdir, va);
    return 0;
}

make grade运行结果：

dumbfork: OK (6.8s)
Part A score: 5/5

faultread: OK (5.2s)
faultwrite: OK (5.1s)
...

Part B: Copy-on-Write Fork

fork复制父进程的地址空间开销很大，因此最新的unix使用了写时复制的方法，即容许父进程和子进程共享内存映射直到一个进程实际上修改那部份内存的时候再复制

Exercise 8

static int
sys_env_set_pgfault_upcall(envid_t envid, void *func)
{
    // LAB 4: Your code here.
    struct Env* e;
    if (envid2env(envid, &e, 1) != 0)
        return -E_BAD_ENV;
    e->env_pgfault_upcall = func;
    // panic("sys_env_set_pgfault_upcall not implemented");
}

Exercise 9 10 11

// 设置 page fault 处理函数
void
set_pgfault_handler(void (*handler)(struct UTrapframe *utf))
{
    int r;

    if (_pgfault_handler == 0) {
        // First time through!
        // LAB 4: Your code here.
        envid_t eid = sys_getenvid();
        r = sys_page_alloc(eid, (void*)(UXSTACKTOP - PGSIZE), PTE_U | PTE_P | PTE_W);
        if (r < 0) {
            panic("set_pgfault_handler: %e", r);
        }
        r = sys_env_set_pgfault_upcall(eid, _pgfault_upcall);
        if (r < 0) {
            panic("set_pgfault_handler: %e", r);
        }
        // panic("set_pgfault_handler not implemented");
    }

    // Save handler pointer for assembly to call.
    _pgfault_handler = handler;
}

void
page_fault_handler(struct Trapframe *tf)
{
    uint32_t fault_va;

    // Read processor's CR2 register to find the faulting address
    fault_va = rcr2();

    // Handle kernel-mode page faults.

    // LAB 3: Your code here.
    // 若是是在kernel中page fault
    if ((tf->tf_cs & 0x3) == 0) {
        panic("page fault in kernel mode!");
    }

    // LAB 4: Your code here.

    if (curenv->env_pgfault_upcall != 0) {
        struct UTrapframe* utf;
        if (tf->tf_esp >= UXSTACKTOP - PGSIZE && tf->tf_esp < UXSTACKTOP) {
            // 从 user exception stack 产生异常
            utf = (struct UTrapframe*)(tf->tf_esp - 4 - sizeof(struct UTrapframe));
        } else {
            utf = (struct UTrapframe*)(UXSTACKTOP - sizeof(struct UTrapframe));
        }
        user_mem_assert(curenv, (void*)utf, sizeof(struct UTrapframe), PTE_W);
        utf->utf_eflags = tf->tf_eflags;
        utf->utf_eip = tf->tf_eip;
        utf->utf_err = tf->tf_err;
        utf->utf_esp = tf->tf_esp;
        utf->utf_fault_va = fault_va;
        utf->utf_regs = tf->tf_regs;
        tf->tf_eip = (uintptr_t)curenv->env_pgfault_upcall;
        tf->tf_esp = (uintptr_t)utf;
        env_run(curenv);
    }

    // Destroy the environment that caused the fault.
    cprintf("[%08x] user fault va %08x ip %08x\n",
        curenv->env_id, fault_va, tf->tf_eip);
    print_trapframe(tf);
    env_destroy(curenv);
}

.text
.globl _pgfault_upcall
_pgfault_upcall:
    // Call the C page fault handler.
    pushl %esp			// function argument: pointer to UTF
    movl _pgfault_handler, %eax
    call *%eax
    addl $4, %esp			// pop function argument

    // LAB 4: Your code here.
    movl 48(%esp), %ebp
    subl $4, %ebp 			// ebp 的值为utf_esp - 4
    movl %ebp, 48(%esp) 	// 更改UTrapframe的esp为utf_esp - 4
    movl 40(%esp), %eax
    movl %eax, (%ebp) 		// 将eip存进 utf_esp-4 处

    // Restore the trap-time registers.  After you do this, you
    // can no longer modify any general-purpose registers.
    // LAB 4: Your code here.
    addl $8, %esp
    popal

    // Restore eflags from the stack.  After you do this, you can
    // no longer use arithmetic operations or anything else that
    // modifies eflags.
    // LAB 4: Your code here.
    addl $4, %esp
    popfl

    // Switch back to the adjusted trap-time stack.
    // LAB 4: Your code here.
    popl %esp
    // 此时esp为 utf_esp - 4，指向的内容为ip

    // Return to re-execute the instruction that faulted.
    // LAB 4: Your code here.
    // ret 即 pop eip，以后esp就等于utf_esp了
    ret

make grade结果：

faultread: OK (7.4s)
faultwrite: OK (7.8s)
faultdie: OK (7.7s)
faultregs: OK (7.6s)
faultalloc: OK (7.6s)
faultallocbad: OK (8.0s)
faultnostack: OK (9.6s)
faultbadhandler: OK (8.9s)
faultevilhandler: OK (7.7s)

Exercise 12

在inc/memlayout.h中添加：

#define JOS_USER 1

这样就可使用uvpt和uvpd了

fork.c：

envid_t
fork(void)
{
    // LAB 4: Your code here.
    set_pgfault_handler(pgfault);
    envid_t envid = sys_exofork();
    if (envid < 0)
        panic("sys_exofork: %e", envid);
    if (envid == 0) {
        // 在子进程
        thisenv = &envs[ENVX(sys_getenvid())];
        return 0;
    }
    // 在父进程
    for (uintptr_t addr = UTEXT; addr < USTACKTOP; addr += PGSIZE)
        if (uvpd[PDX(addr)] & PTE_P && uvpt[PGNUM(addr)] & PTE_P)
            duppage(envid, PGNUM(addr));
    int r;
    // 给子进程分配 exception stack page
    if ((r = sys_page_alloc(envid, (void*)(UXSTACKTOP - PGSIZE), PTE_P | PTE_U | PTE_W)) < 0)
        panic("sys_page_alloc: %e", r);
    extern void _pgfault_upcall(void);
    if ((r = sys_env_set_pgfault_upcall(envid, _pgfault_upcall)) < 0)
        panic("sys_env_set_pgfault_upcall: %e", r);
    if ((r = sys_env_set_status(envid, ENV_RUNNABLE)) < 0)
        panic("sys_env_set_status: %e", r);
    return envid;
}

static int
duppage(envid_t envid, unsigned pn)
{
    int r;

    // LAB 4: Your code here.
    void* va = (void*)(pn << PGSHIFT);
    int perm = uvpt[pn] & 0xFFF;
    if ((perm & PTE_W) || (perm & PTE_COW)) {
        perm |= PTE_COW; // 增长 PTE_COW
        perm &= ~PTE_W;  // 减去 PTE_W
    }
    perm &= PTE_SYSCALL;
    if ((r = sys_page_map(0, va, envid, va, perm)) < 0)
        panic("sys_page_map: %e", r);
    // 不知为什么，这个放前面会出现乱码
    if ((r = sys_page_map(0, va, 0, va, perm)) < 0)
        panic("sys_page_map: %e", r);
    return r;
}

static void
pgfault(struct UTrapframe *utf)
{
    void *addr = (void *) utf->utf_fault_va;
    uint32_t err = utf->utf_err;
    int r;

    // LAB 4: Your code here.
    if ((err & FEC_WR) == 0 || (uvpt[PGNUM(addr)] & PTE_COW) == 0)
        panic("pgfault: check faluting access failed");

    // LAB 4: Your code here.
    if ((r = sys_page_alloc(0, PFTEMP, PTE_P | PTE_U | PTE_W)) < 0)
        panic("sys_page_alloc: %e", r);
    addr = ROUNDDOWN(addr, PGSIZE);
    memcpy(PFTEMP, addr, PGSIZE);
    if ((r = sys_page_map(0, PFTEMP, 0, addr, PTE_P | PTE_U | PTE_W)) < 0)
        panic("sys_page_map: %e", r);
    if ((r = sys_page_unmap(0, PFTEMP)) < 0)
        panic("sys_page_unmap: %e", r);
}

make grade，part B 经过

Part C: Preemptive Multitasking and Inter-Process communication (IPC)

Exercise 13

在kern/env.c: env_alloc()中：

// Enable interrupts while in user mode.
    // LAB 4: Your code here.
    e->env_tf.tf_eflags |= FL_IF;

取消kern/sched.c中sti的注释：

void
sched_halt(void)
{
// 省略...
    asm volatile (
        "movl $0, %%ebp\n"
        "movl %0, %%esp\n"
        "pushl $0\n"
        "pushl $0\n"
        // Uncomment the following line after completing exercise 13
        "sti\n"
        "1:\n"
        "hlt\n"
        "jmp 1b\n"
    : : "a" (thiscpu->cpu_ts.ts_esp0));

kern/trapentry.S：

TRAPHANDLER_NOEC(vector32, IRQ_OFFSET + IRQ_TIMER)
TRAPHANDLER_NOEC(vector33, IRQ_OFFSET + IRQ_KBD)
TRAPHANDLER_NOEC(vector36, IRQ_OFFSET + IRQ_SERIAL)
TRAPHANDLER_NOEC(vector39, IRQ_OFFSET + IRQ_SPURIOUS)
TRAPHANDLER_NOEC(vector46, IRQ_OFFSET + IRQ_IDE)
TRAPHANDLER_NOEC(vector51, IRQ_OFFSET + IRQ_ERROR)

kern/trap.c：

void
trap_init(void)
{
// 省略...
    void vector32();
    void vector33();
    void vector36();
    void vector39();
    void vector46();
    void vector51();
// 省略...
    SETGATE(idt[IRQ_OFFSET + IRQ_TIMER], 0, GD_KT, vector32, 3)
    SETGATE(idt[IRQ_OFFSET + IRQ_KBD], 0, GD_KT, vector33, 3)
    SETGATE(idt[IRQ_OFFSET + IRQ_SERIAL], 0, GD_KT, vector36, 3)
    SETGATE(idt[IRQ_OFFSET + IRQ_SPURIOUS], 0, GD_KT, vector39, 3)
    SETGATE(idt[IRQ_OFFSET + IRQ_IDE], 0, GD_KT, vector46, 3)
    SETGATE(idt[IRQ_OFFSET + IRQ_ERROR], 0, GD_KT, vector51, 3)

Exercise 14

kern/trap.c：

static void
trap_dispatch(struct Trapframe *tf)
{
// 省略...
    // LAB 4: Your code here.
    if (tf->tf_trapno == IRQ_OFFSET + IRQ_TIMER) {
        lapic_eoi();
        sched_yield();
        return;
    }

Exercise 15

lib/ipc.c：

void
ipc_send(envid_t to_env, uint32_t val, void *pg, int perm)
{
    // LAB 4: Your code here.
    int r = -E_IPC_NOT_RECV;
    pg = pg == NULL ? (void*)UTOP : pg;
    while (r == -E_IPC_NOT_RECV) {
        r = sys_ipc_try_send(to_env, val, pg, perm);
        if (r == 0) {
            sys_yield();
            return;
        }
    }
    panic("ipc_send faild: %e", r);
}

int32_t
ipc_recv(envid_t *from_env_store, void *pg, int *perm_store)
{
    // LAB 4: Your code here.
    int r = sys_ipc_recv(pg == NULL ? (void*)UTOP : pg);
    if (from_env_store != NULL)
        *from_env_store = r == 0 ? thisenv->env_ipc_from : 0;
    if (perm_store != NULL)
        *perm_store = r == 0 ? thisenv->env_ipc_perm : 0;
    return r == 0 ? thisenv->env_ipc_value : r;
}

kern/syscall.c：

// 使 sys_page_map 调用 envid2env 时暂时不检查 perm
// 注：使用该值时，将其做为参数，但不要实际改变pte的值
#define PTE_NOT_CHECK 0x200

static int
sys_ipc_try_send(envid_t envid, uint32_t value, void *srcva, unsigned perm)
{
    // LAB 4: Your code here.
    struct Env* env;
    int r;
    if ((r = envid2env(envid, &env, 0)) != 0)
        return r;
    if (env->env_ipc_recving == 0) // 若是接受方没准备好接收数据
        return -E_IPC_NOT_RECV;
    // 若是接收方准备好接收了，那就传递数据
    env->env_status = ENV_RUNNABLE;
    env->env_ipc_recving = 0;
    env->env_ipc_from = curenv->env_id;
    env->env_ipc_value = value;
    env->env_ipc_perm = 0;
    env->env_tf.tf_regs.reg_eax = 0;
    // 映射页面
    if (srcva != NULL && (uintptr_t)srcva < UTOP && env->env_ipc_dstva != NULL) {
        r = sys_page_map(0, srcva, envid, env->env_ipc_dstva, perm | PTE_NOT_CHECK);
        env->env_ipc_perm = perm;
    }
    return r;
}

static int
sys_ipc_recv(void *dstva)
{
    // LAB 4: Your code here.
    if ((uint32_t)dstva < UTOP && PGOFF(dstva) != 0) {
        return -E_INVAL;
    }
    curenv->env_ipc_recving = 1;
    curenv->env_ipc_dstva = dstva;
    curenv->env_status = ENV_NOT_RUNNABLE;
    sys_yield(); // 注：yield后，要经过tf的eax改返回值
    return 0;
}

修改sys_page_map部份内容以支持envid2env时不检查perm的状况：

static int
sys_page_map(envid_t srcenvid, void *srcva,
         envid_t dstenvid, void *dstva, int perm)
{
    // LAB 4: Your code here.
    struct Env *srcenv, *dstenv;
    bool chekperm = !(perm & PTE_NOT_CHECK);
    if ((envid2env(srcenvid, &srcenv, chekperm) != 0) || (envid2env(dstenvid, &dstenv, chekperm) != 0))
        return -E_BAD_ENV;
    perm &= ~PTE_NOT_CHECK; // 取消 PTE_NOT_CHECK

make grade，所有经过