vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

commit 0687238f4aa7b3150cc097cb757f16e762f4cfc6
parent 09a0ae1864ae46cd1220469a1104bca6df770805
Author: Russ Cox <rsc@swtch.com>
Date:   Fri,  4 Jul 2008 02:21:42 -0400

9vx: introduce multiple user address spaces

Diffstat:
Msrc/9vx/a/dat.h | 18++++++++++++++----
Msrc/9vx/a/fault.c | 4++--
Msrc/9vx/a/sysproc.c | 2++
Msrc/9vx/main.c | 8++++++++
Msrc/9vx/mmu.c | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Msrc/9vx/trap.c | 14+++++++++-----
Msrc/9vx/vx32.c | 114++++++++++++++++++++++++++++++++++++++++---------------------------------------
7 files changed, 238 insertions(+), 117 deletions(-)

diff --git a/src/9vx/a/dat.h b/src/9vx/a/dat.h @@ -1,4 +1,5 @@ #include <ucontext.h> +#include "libvx32/vx32.h" typedef struct BIOS32si BIOS32si; typedef struct Conf Conf; @@ -114,12 +115,14 @@ struct Conf * MMU stuff in proc */ #define NCOLOR 1 +typedef struct Uspace Uspace; struct PMMU { - ulong lo; // Plan 9 VX - ulong hi; // Plan 9 VX struct vxproc *vxproc; // Plan 9 VX - struct vxmmap *vxmm; // Plan 9 VX + struct vxmem vxmem; + struct vxmmap vxmm; // Plan 9 VX + Uspace *us; + uchar *uzero; }; /* @@ -348,7 +351,6 @@ struct DevConf // Plan 9 VX extern int traceprocs; extern int tracesyscalls; -extern uchar *uzero; extern int doabort; /* Pthreads-based sleep and wakeup. */ @@ -361,3 +363,11 @@ struct Psleep Pwaiter *waiter; }; +struct Uspace +{ + Proc *p; // proc currently mapped + uchar *uzero; + ulong lo; + ulong hi; +}; + diff --git a/src/9vx/a/fault.c b/src/9vx/a/fault.c @@ -343,7 +343,7 @@ okaddr(ulong addr, ulong len, int write) continue; } qunlock(&s->lk); - return uzero+addr0; + return up->pmmu.uzero+addr0; } } pprint("suicide: invalid address 0x%lux/%lud in sys call pc=0x%lux\n", addr, len, userpc()); @@ -400,7 +400,7 @@ vmemchr(void *s, int c, int n) a += m_; n -= m_; if(isuaddr(a)) - uvalidaddr(a-uzero, 1, 0); + uvalidaddr(a-up->pmmu.uzero, 1, 0); } /* fits in one page */ diff --git a/src/9vx/a/sysproc.c b/src/9vx/a/sysproc.c @@ -379,6 +379,8 @@ sysexec(ulong *arg) /* * Top-of-stack structure. */ + uchar *uzero; + uzero = up->pmmu.uzero; Tos *tos; ulong utos; utos = USTKTOP - sizeof(Tos); diff --git a/src/9vx/main.c b/src/9vx/main.c @@ -109,6 +109,9 @@ main(int argc, char **argv) case 'S': tracesyscalls++; break; + case 'U': + nuspace = atoi(EARGF(usage())); + break; case 'X': vx32_debugxlate++; break; @@ -419,7 +422,9 @@ showexec(ulong sp) { ulong *a, *argv; int i, n; + uchar *uzero; + uzero = up->pmmu.uzero; iprint("showexec %p\n", sp); if(sp >= USTKTOP || sp < USTKTOP-USTKSIZE) panic("showexec: bad sp"); @@ -510,6 +515,7 @@ sigsegv(int signo, siginfo_t *info, void *v) int read; ulong addr, eip, esp; ucontext_t *uc; + uchar *uzero; if(m == nil) panic("sigsegv: m == nil"); @@ -518,6 +524,8 @@ sigsegv(int signo, siginfo_t *info, void *v) if(up == nil) panic("sigsegv: up == nil"); + uzero = up->pmmu.uzero; + uc = v; #if defined(__APPLE__) mcontext_t mc; diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c @@ -30,14 +30,19 @@ int tracemmu; static int pagefile; static char* pagebase; -uchar *uzero; + +static Uspace uspace[16]; +static Uspace *ulist[nelem(uspace)]; +int nuspace = 1; int isuaddr(void *v) { uchar *p; + uchar *uzero; p = v; + uzero = up->pmmu.uzero; return uzero <= p && p < uzero+USTKTOP; } @@ -46,7 +51,7 @@ isuaddr(void *v) * The point is to reserve the space so that * nothing else ends up there later. */ -static void +static void* mapzero(void) { int fd; @@ -55,20 +60,16 @@ mapzero(void) /* First try mmaping /dev/zero. Some OS'es don't allow this. */ if((fd = open("/dev/zero", O_RDONLY)) >= 0){ v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE, fd, 0); - if(v != MAP_FAILED){ - uzero = v; - return; - } + if(v != MAP_FAILED) + return v; } /* Next try an anonymous map. */ v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if(v != MAP_FAILED){ - uzero = v; - return; - } - - panic("mapzero: cannot reserve process address space"); + if(v != MAP_FAILED) + return v; + + return nil; } void @@ -76,8 +77,8 @@ mmuinit(void) { char tmp[] = "/var/tmp/9vx.pages.XXXXXX"; void *v; - - mapzero(); + int i; + if((pagefile = mkstemp(tmp)) < 0) panic("mkstemp: %r"); if(ftruncate(pagefile, MEMSIZE) < 0) @@ -92,6 +93,17 @@ mmuinit(void) panic("mmap pagefile: %r"); pagebase = v; + if(nuspace <= 0) + nuspace = 1; + if(nuspace > nelem(uspace)) + nuspace = nelem(uspace); + for(i=0; i<nuspace; i++){ + uspace[i].uzero = mapzero(); + if(uspace[i].uzero == nil) + panic("mmap address space %d", i); + ulist[i] = &uspace[i]; + } + conf.mem[0].base = 0; conf.mem[0].npage = MEMSIZE / BY2PG; @@ -128,23 +140,15 @@ kunmap(KMap *k) } /* - * The process whose address space we've got mapped. - * We cache our own copy so that entering the scheduler - * and coming back out running the same process doesn't - * cause unnecessary unmapping and remapping. - */ -static Proc *mmup; - -/* * Flush the current address space. */ static void -mmapflush(void) +mmapflush(Uspace *us) { m->flushmmu = 0; /* Nothing mapped? */ - if(mmup == nil || mmup->pmmu.lo > mmup->pmmu.hi) + if(us == nil || us->lo > us->hi || us->uzero == nil) return; #ifdef __FreeBSD__ @@ -154,20 +158,20 @@ mmapflush(void) * tell whether a page is mapped, so we have to remap * something with no pages here. */ - if(mmap(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE, + if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED) panic("mmapflush mmap: %r"); - mmup->pmmu.lo = 0x80000000UL; - mmup->pmmu.hi = 0; + us->lo = 0x80000000UL; + us->hi = 0; return; } #endif /* Clear only as much as got mapped. */ - if(mprotect(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE) < 0) + if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0) panic("mmapflush mprotect: %r"); - mmup->pmmu.lo = 0x80000000UL; - mmup->pmmu.hi = 0; + us->lo = 0x80000000UL; + us->hi = 0; } /* @@ -178,13 +182,15 @@ void putmmu(ulong va, ulong pa, Page *p) { int prot; - PMMU *pmmu; + Uspace *us; if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa) print("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa); assert(p->pa < MEMSIZE && pa < MEMSIZE); assert(up); + us = up->pmmu.us; + assert(us); /* Map the page */ prot = PROT_READ; @@ -192,21 +198,20 @@ putmmu(ulong va, ulong pa, Page *p) prot |= PROT_WRITE; pa &= ~(BY2PG-1); va &= ~(BY2PG-1); - if(mmap(uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED, + if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED, pagefile, pa) == MAP_FAILED) panic("putmmu"); /* Record high and low address range for quick unmap. */ - pmmu = &up->pmmu; - if(pmmu->lo > va) - pmmu->lo = va; - if(pmmu->hi < va) - pmmu->hi = va; + if(us->lo > va) + us->lo = va; + if(us->hi < va) + us->hi = va; // printlinuxmaps(); } /* - * The memory maps have changed. Flush all cached state. + * The memory maps have changed for up. Flush all cached state. */ void flushmmu(void) @@ -214,9 +219,78 @@ flushmmu(void) if(tracemmu) print("flushmmu\n"); - if(up) + if(up){ vxproc_flush(up->pmmu.vxproc); - mmapflush(); + mmapflush(up->pmmu.us); + } +} + +void +usespace(Uspace *us) +{ + int i; + + for(i=0; i<nuspace; i++) + if(ulist[i] == us){ + while(i > 0){ + ulist[i] = ulist[i-1]; + i--; + } + ulist[0] = us; + break; + } +} + +Uspace* +getspace(Proc *p) +{ + Uspace *us; + + us = ulist[nuspace-1]; + if(us->p){ + if(tracemmu) + print("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace); + mmapflush(us); + } + us->p = p; + p->pmmu.vxmm.base = us->uzero; + p->pmmu.uzero = us->uzero; + p->pmmu.us = us; + usespace(us); + return us; +} + +void +takespace(Proc *p, Uspace *us) +{ + usespace(us); + if(us->p == p) + return; + if(tracemmu){ + if(us->p) + print("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace); + } + us->p = p; + mmapflush(us); +} + +void +putspace(Uspace *us) +{ + int i; + + mmapflush(us); + us->p->pmmu.us = nil; + us->p->pmmu.uzero = nil; + us->p->pmmu.vxmm.base = nil; + us->p = nil; + for(i=0; i<nuspace; i++) + if(ulist[i] == us){ + while(++i < nuspace) + ulist[i-1] = ulist[i]; + ulist[i-1] = us; + break; + } } /* @@ -231,15 +305,31 @@ mmuswitch(Proc *p) * one we were just in. Also, kprocs don't count -- * only the guys on cpu0 do. */ - if(!p->kp && (mmup != p || p->newtlb || m->flushmmu)){ - if(0) print("^^^^^^^^^^ %ld %s\n========== %ld %s\n", - mmup ? mmup->pid : 0, mmup? mmup->text : "", - p->pid, p->text); - /* No vxproc_flush - vxproc cache is okay */ - mmapflush(); + if(p->kp) + return; + + if(tracemmu) + print("mmuswitch %ld %s\n", p->pid, p->text); + + if(p->pmmu.us && p->pmmu.us->p == p){ + if(tracemmu) print("---------- %ld %s [%d]\n", + p->pid, p->text, p->pmmu.us - uspace); + usespace(p->pmmu.us); + if(!p->newtlb && !m->flushmmu){ + usespace(p->pmmu.us); + return; + } + mmapflush(p->pmmu.us); p->newtlb = 0; - mmup = p; + return; } + + if(p->pmmu.us == nil) + getspace(p); + else + takespace(p, p->pmmu.us); + if(tracemmu) print("========== %ld %s [%d]\n", + p->pid, p->text, p->pmmu.us - uspace); } /* @@ -250,11 +340,16 @@ mmurelease(Proc *p) { if(p->kp) return; + if(tracemmu) + print("mmurelease %ld %s\n", p->pid, p->text); if(p->pmmu.vxproc) vxproc_flush(p->pmmu.vxproc); - if(p == mmup || m->flushmmu){ - mmapflush(); - mmup = nil; + if(p->pmmu.us){ + if(tracemmu) + print("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace); + putspace(p->pmmu.us); + if(m->flushmmu) + mmapflush(p->pmmu.us); } } diff --git a/src/9vx/trap.c b/src/9vx/trap.c @@ -30,7 +30,7 @@ kexit(Ureg *ureg) Tos *tos; /* precise time accounting, kernel exit */ - tos = (Tos*)(uzero+USTKTOP-sizeof(Tos)); + tos = (Tos*)(up->pmmu.uzero+USTKTOP-sizeof(Tos)); cycles(&t); tos->kcycles += t - up->kentry; tos->pcycles = up->pcycles; @@ -90,7 +90,7 @@ trap(Ureg *ureg) case VXTRAP_SOFT+0x40: /* int $0x40 - system call */ if(tracesyscalls){ - ulong *sp = (ulong*)(uzero + ureg->usp); + ulong *sp = (ulong*)(up->pmmu.uzero + ureg->usp); print("%d [%s] %s %#lux %08lux %08lux %08lux %08lux\n", up->pid, up->text, sysctab[ureg->ax], sp[0], sp[1], sp[2], sp[3]); @@ -262,7 +262,7 @@ syscall(Ureg *ureg) up->psstate = 0; if(scallnr == NOTED) - noted(ureg, *(ulong*)(uzero + sp+BY2WD)); + noted(ureg, *(ulong*)(up->pmmu.uzero + sp+BY2WD)); if(scallnr!=RFORK && (up->procctl || up->nnote)){ splhi(); @@ -335,6 +335,8 @@ notify(Ureg* ureg) pexit("Suicide", 0); } + uchar *uzero; + uzero = up->pmmu.uzero; upureg = (void*)(uzero + sp); memmove(upureg, ureg, sizeof(Ureg)); *(ulong*)(uzero + sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */ @@ -383,6 +385,8 @@ noted(Ureg* ureg, ulong arg0) pexit("Suicide", 0); } + uchar *uzero; + uzero = up->pmmu.uzero; oureg = up->ureg; nureg = (Ureg*)(uzero + up->ureg); @@ -442,11 +446,11 @@ execregs(ulong entry, ulong ssize, ulong nargs) up->fpstate = FPinit; fpoff(); - sp = (ulong*)(uzero + USTKTOP - ssize); + sp = (ulong*)(up->pmmu.uzero + USTKTOP - ssize); *--sp = nargs; ureg = up->dbgreg; - ureg->usp = (uchar*)sp - uzero; + ureg->usp = (uchar*)sp - up->pmmu.uzero; //showexec(ureg->usp); ureg->pc = entry; return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */ diff --git a/src/9vx/vx32.c b/src/9vx/vx32.c @@ -17,7 +17,6 @@ #include "u.h" #include <pthread.h> #include <sys/mman.h> -#include "libvx32/vx32.h" #include "lib.h" #include "mem.h" #include "dat.h" @@ -50,47 +49,6 @@ vx32sysr1(void) } /* - * Vxnewproc is called at the end of newproc - * to fill in vx32-specific entries in the Proc struct - * before it gets used. - */ -void -vxnewproc(Proc *p) -{ - PMMU *pm; - - pm = &p->pmmu; - - /* - * Kernel procs don't need vxprocs; if this proc - * already has one, take it away. Also, give - * kernel procs very large stacks so they can call - * into non-thread-friendly routines like x11 - * and getgrgid. - */ - if(p->kp){ - if(pm->vxproc){ - pm->vxproc->mem = nil; - vxproc_free(pm->vxproc); - pm->vxproc = nil; - } - free(p->kstack); - p->kstack = nil; - p->kstack = smalloc(512*1024); - return; - } - - pm->lo = 0x80000000UL; - pm->hi = 0; - if(pm->vxproc == nil){ - pm->vxproc = vxproc_alloc(); - if(pm->vxproc == nil) - panic("vxproc_alloc"); - pm->vxproc->mem = &thevxmem; - } -} - -/* * Vx32 hooks to read, write, map, unmap, and check permissions * on user memory. Normally these are more involved, but we're * using the processor to do everything. @@ -98,29 +56,21 @@ vxnewproc(Proc *p) static ssize_t vmread(vxmem *vm, void *data, uint32_t addr, uint32_t len) { - memmove(data, uzero+addr, len); + memmove(data, vm->mapped->base+addr, len); return len; } static ssize_t vmwrite(vxmem *vm, const void *data, uint32_t addr, uint32_t len) { - memmove(uzero+addr, data, len); + memmove(vm->mapped->base+addr, data, len); return len; } -static vxmmap thevxmmap = -{ - 1, - (void*)-1, /* to be filled in with user0 */ - USTKTOP, -}; - static vxmmap* vmmap(vxmem *vm, uint32_t flags) { - thevxmmap.base = uzero; - return &thevxmmap; + return vm->mapped; } static void @@ -131,6 +81,14 @@ vmunmap(vxmem *vm, vxmmap *mm) static int vmcheckperm(vxmem *vm, uint32_t addr, uint32_t len, uint32_t perm, uint32_t *out_faultva) { + if(addr >= USTKTOP){ + *out_faultva = addr; + return 0; + } + if(addr+len < addr || addr +len > USTKTOP){ + *out_faultva = USTKTOP; + return 0; + } /* All is allowed - handle faults as they happen. */ return 1; } @@ -164,6 +122,50 @@ static vxmem thevxmem = vmfree, }; +/* + * Vxnewproc is called at the end of newproc + * to fill in vx32-specific entries in the Proc struct + * before it gets used. + */ +void +vxnewproc(Proc *p) +{ + PMMU *pm; + + pm = &p->pmmu; + + /* + * Kernel procs don't need vxprocs; if this proc + * already has one, take it away. Also, give + * kernel procs very large stacks so they can call + * into non-thread-friendly routines like x11 + * and getgrgid. + */ + if(p->kp){ + if(pm->vxproc){ + // vxunmap(p); + assert(pm->uzero == nil); + pm->vxproc->mem = nil; + vxproc_free(pm->vxproc); + pm->vxproc = nil; + } + free(p->kstack); + p->kstack = nil; + p->kstack = smalloc(512*1024); + return; + } + + if(pm->vxproc == nil){ + pm->vxproc = vxproc_alloc(); + if(pm->vxproc == nil) + panic("vxproc_alloc"); + pm->vxproc->mem = &pm->vxmem; + pm->vxmem = thevxmem; + pm->vxmem.mapped = &pm->vxmm; + memset(&pm->vxmm, 0, sizeof pm->vxmm); + } +} + static void setclock(int start) { @@ -224,7 +226,7 @@ touser(void *initsp) * Optimization: try to fault in code page and stack * page right now, since we're likely to need them. */ - if(up->pmmu.hi == 0){ + if(up->pmmu.us->hi == 0){ fault(vp->cpu->eip, 1); fault(vp->cpu->reg[ESP], 0); } @@ -267,11 +269,11 @@ touser(void *initsp) addr = (uchar*)vp->cpu->trapva; if(traceprocs) print("fault %p read=%d\n", addr, read); - if(isuaddr(addr) && fault(addr - uzero, read) >= 0) + if(isuaddr(addr) && fault(addr - up->pmmu.uzero, read) >= 0) continue; print("%ld %s: unhandled fault va=%lux [%lux] eip=%lux\n", up->pid, up->text, - addr - uzero, vp->cpu->trapva, vp->cpu->eip); + addr - up->pmmu.uzero, vp->cpu->trapva, vp->cpu->eip); proc2ureg(vp, &u); dumpregs(&u); if(doabort)