6.S081-Lab 3 PAGE TABLES实验笔记

Speed up system calls

一些操作系统 (e.g., Linux)通过在只读的区域共享用户空间和内核态空间的数据加速特定的系统调用。这种方法可以减少系统调用需要不断跨越用户态和内核态的需求T. To help you learn how to insert mappings into a page table, your first task is to implement this optimization for the getpid() system call in xv6.

1
2
3
4
5
6
int
ugetpid(void)
{
struct usyscall *u = (struct usyscall *)USYSCALL;//获得USYSCALL的页把它转为一个结构体指针
return u->pid;
}

当每个process被创建的时候, 在USYSCALL (a VA defined in memlayout.h)映射一个只读page . 在页的开头, store a struct usyscall (also defined in memlayout.h), and initialize it to store the PID of the current process.

1
2
3
4
5
6
#define TRAPFRAME (TRAMPOLINE - PGSIZE)
#ifdef LAB_PGTBL
#define USYSCALL (TRAPFRAME - PGSIZE)
struct usyscall {
int pid; // Process ID
};

For this lab, ugetpid() has been provided on the userspace side and will automatically use the USYSCALL mapping.

从上面可以看出应该是TRAPFRAME在TRAMPOLINE下面一页,USYSCALL在TRAPFRAME往下一页的位置

  • You can perform the mapping in proc_pagetable() in kernel/proc.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
 //给进程分配了一个页表
// An empagetable_t
proc_pagetable(struct proc *p)
{
pagetable_t pagetable;

// An empty page table.
pagetable = uvmcreate();
if(pagetable == 0)
return 0;

// map the trampoline code (for system call return)
// at the highest user virtual address.
// only the supervisor uses it, on the way
// to/from user space, so not PTE_U.
if(mappages(pagetable, TRAMPOLINE, PGSIZE,
(uint64)trampoline, PTE_R | PTE_X) < 0){
uvmfree(pagetable, 0);
return 0;
}

// map the trapframe just below TRAMPOLINE, for trampoline.S.
if(mappages(pagetable, TRAPFRAME, PGSIZE,
(uint64)(p->trapframe), PTE_R | PTE_W) < 0){
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmfree(pagetable, 0);
return 0;
}
//map the usyscall
if(mappages(pagetable, USYSCALL, PGSIZE,
(uint64)(p->usyscall), PTE_R | PTE_U) < 0){
uvmunmap(pagetable, USYSCALL, 1, 0); //如果失败取消映射并释放页
uvmfree(pagetable, 0);
return 0;
}
return pagetable;
}

我们从上面的函数中可以看到trampoline codetrapframe的映射过程,我们的目标是通过模仿这两个的过程实现对USYSCALL的映射。

Choose permission bits that allow userspace to only read the page.也就是说我们要将页的权限设为只读的。权限设置部分是在这里操作的: if(mappages(pagetable, TRAMPOLINE, PGSIZE,(uint64)trampoline, PTE_R | PTE_X) < 0)其中PTE_R 是read,|PTE_X

是可执行,PTE_W是可写。

1
2
3
4
5
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
  • You may find that mappages() is a useful utility.

mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)其中,va是起始地址的虚拟地址对应的物理页的启示地址是pa。其中va和size可能不是页对齐的。如果通过walk函数不能分配所需要的页表就返回-1否则返回0

  • Don't forget to allocate and initialize the page in allocproc()

如下所示我们需要在allocproc这个是分配进程的函数,里面涉及到页的分配,函数里面对模仿Allocate a trapframe page 对usyscall也分配页

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
allocproc(void)
{
struct proc *p;

for(p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if(p->state == UNUSED) {
goto found;
} else {
release(&p->lock);
}
}
return 0;

found:
p->pid = allocpid();
p->state = USED;

// Allocate a trapframe page.
if((p->trapframe = (struct trapframe *)kalloc()) == 0){
freeproc(p);
release(&p->lock);
return 0;
}
//allocate an usyscall page
if((p->usyscall = (struct usyscall *)kalloc()) == 0){
freeproc(p);
release(&p->lock);
return 0;
}
p->usyscall->pid=p->pid;
// An empty user page table.
p->pagetable = proc_pagetable(p);
if(p->pagetable == 0){
freeproc(p);
release(&p->lock);
return 0;
}

// Set up new context to start executing at forkret,
// which returns to user space.
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64)forkret;
p->context.sp = p->kstack + PGSIZE;

return p;
}
  • Make sure to free the page in freeproc().

释放进程的时候同样涉及对页表的释放。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
// free a proc structure and the data hanging from it,
// including user pages.
// p->lock must be held.
static void
freeproc(struct proc *p)
{
if(p->trapframe)
kfree((void*)p->trapframe);
p->trapframe = 0;
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;

if(p->usyscall)
kfree((void*)p->usyscall);
p->usyscall = 0;

p->sz = 0;
p->pid = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
p->killed = 0;
p->xstate = 0;
p->state = UNUSED;
}

proc_freepagetable(pagetable_t pagetable, uint64 sz)
{
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmunmap(pagetable, TRAPFRAME, 1, 0);
uvmunmap(pagetable, USYSCALL, 1, 0);
uvmfree(pagetable, sz);
}

Print a page table

定义一个名叫 vmprint()的函数. 这个函数输入参数是 pagetable_t, and print that pagetable in the format described below.

Insert if(p->pid==1) vmprint(p->pagetable) in exec.c just before the return argc, to print the first process's page table.

Now when you start xv6 it should print output like this, describing the page table of the first process at the point when it has just finished exec()ing init。如上图所示。

第一行displays 传给vmprint的参数. After that there is a line for each PTE, including PTEs that refer to page-table pages deeper in the tree. 用" .."` 表示树的深度,第一级两个点,第二级四个,第三级六个.

Each PTE line 显示了the PTE index in its page-table page, the pte bits, and the physical address extracted from the PTE. Don't print PTEs that are not valid.

在上面的例子中,如上图所示,顶级的页表映射了第0项和第255项。下一个层级0映射了0,0项又映射了0,1,2项。

  • You can put vmprint() in kernel/vm.c. 首先在vm.c里面定义vmprint

  • The function freewalk may be inspirational.可以参考freewalk函数进行设计

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}

最终函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
void vmprint(pagetable_t pagetable,uint64 depth){
// there are 2^9 = 512 PTEs in a page table.
if(depth>2)
return;
if(depth==0){
printf("page table %p\n", pagetable);
}
char *buf = prefix[depth];
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if(pte & PTE_V){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
printf("%s%d: pte %p pa %p\n", buf, i, pte, PTE2PA(pte));
vmprint((pagetable_t) child, depth + 1);//使用广度优先搜索的思想
}
}
}

Detecting which pages have been accessed

对于一些垃圾回收机制 (a form of automatic memory management) ,知道那些也是被访问过accessed (read or write)的是十分有用的. add a new feature to xv6 that detects and reports this information to userspace by inspecting the access bits in the RISC-V page table. 当遇到TLB miss的时候 RISC-V 硬件会自动标记 PTE里的这些位.

目标L:实现pgaccess(), a system call that reports which pages have been accessed.

The system call takes three arguments.:

  • First, it takes the starting virtual address of the first user page to check.
  • Second, it takes the number of pages to check.
  • Finally, it takes a user address to a buffer to store the results into a bitmask (一个数据结构that uses one bit per page and where the first page corresponds to the least significant bit).

  • Start by implementing sys_pgaccess() in kernel/sysproc.c. 实现sysproc.c文件里面的sys_pgaccess()函数

  • You'll need to parse arguments using argaddr() and argint().因为是系统调用,需要上述两个函数获取入参

  • For the output bitmask, it's easier to store a temporary buffer in the kernel and copy it to the user (via copyout()) after filling it with the right bits. 输出的bitmask需要通过copyout来将其从内核空间拷贝到用户空间

  • walk() in kernel/vm.c is very useful for finding the right PTEs.可以参考walkaddr()和walk函数

  • You'll need to define PTE_A, the access bit, in kernel/riscv.h. Consult the RISC-V manual to determine its value.

从手册上看,RISC-V处理器会自动置位

1
2
3
4
5
6
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
#define PTE_A (1L<<6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
sys_pgaccess(void)
{
// lab pgtbl: your code here.
uint64 addr;
int len;
int bitmask;
if (argaddr(0, &addr) < 0){
return -1;
}
if (argint(1, &len) < 0){
return -1;
}
if (argint(2, &bitmask) < 0){
return -1;
}
if(len>32 || len < 0){
return -1;
}

int res = 0;
struct proc *p = myproc();
for (int i = 0; i < len; i++){
int va = addr + i * PGSIZE;
int abit = vm_pgaccess(p->pagetable, va);
res = res | abit << i;
}

if(copyout(p->pagetable, bitmask, (char*)&res, sizeof(res)) < 0){
return -1;
}

return 0;
}


int vm_pgaccess(pagetable_t pagetable, uint64 va){
pte_t *pte;
uint64 pa;
if(va >= MAXVA)
return 0;

pte = walk(pagetable, va, 0);
if((*pte & PTE_A)!=0){
*pte=*pte&(~PTE_A);//clear pteA flag
return 1;
}
return 0;
}