vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

mmu.c (7541B)


      1 #define	WANT_M
      2 #include "u.h"
      3 #include <pthread.h>
      4 #include "libvx32/vx32.h"
      5 #include <sys/mman.h>
      6 #include "lib.h"
      7 #include "mem.h"
      8 #include "dat.h"
      9 #include "fns.h"
     10 #include "error.h"
     11 #include "ureg.h"
     12 
     13 int tracemmu;
     14 
     15 #ifndef MAP_ANONYMOUS 
     16 #define MAP_ANONYMOUS MAP_ANON
     17 #endif
     18 #define MAP_EMPTY (MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE)
     19 
     20 /*
     21  * We allocate a 256MB page file on disk to hold the "physical memory".
     22  * We'll mmap individual file pages where we need them to simulate
     23  * the page translation of a real MMU.  To make the simulation more
     24  * faithful, we map the vx32 sandboxed address space starting at 0,
     25  * so that kernel 0 = user 0, so that pointers can be shared.
     26  * Plan 9 assumes this, and while it's not a ton of work to break that
     27  * assumption, it was easier not to.
     28  *
     29  * This value may be changed with the -m switch.
     30  */
     31 int memsize = (256<<20);
     32 
     33 static int pagefile;
     34 static char* pagebase;
     35 
     36 static Uspace uspace[16];
     37 static Uspace *ulist[nelem(uspace)];
     38 int nuspace = 1;
     39 
     40 #ifdef __i386__
     41 #define BIT32 0
     42 #define HINT nil
     43 #elif defined(__amd64__)
     44 #ifdef linux
     45 #define BIT32 MAP_32BIT
     46 #define HINT nil
     47 #elif defined(__FreeBSD__)
     48 #define BIT32 MAP_FIXED
     49 #define HINT (caddr_t)0x40000000
     50 #endif
     51 #endif
     52 
     53 int
     54 isuaddr(void *v)
     55 {
     56 	uchar *p;
     57 	uchar *uzero;
     58 	
     59 	p = v;
     60 	uzero = up->pmmu.uzero;
     61 	return uzero <= p && p < uzero+USTKTOP;
     62 }
     63 
     64 /*
     65  * Allocate a process-sized mapping with nothing there.
     66  * The point is to reserve the space so that
     67  * nothing else ends up there later.
     68  */
     69 static void*
     70 mapzero(void)
     71 {
     72 	int fd, bit32;
     73 	void *v;
     74 	void *hint;
     75 	
     76 	bit32 = BIT32;
     77 	hint = HINT;
     78 
     79 	/* First try mmaping /dev/zero.  Some OS'es don't allow this. */
     80 	if((fd = open("/dev/zero", O_RDONLY)) >= 0){
     81 		v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
     82 		if(v != MAP_FAILED) {
     83 			if((uint32_t)(uintptr)v != (uintptr)v) {
     84 				iprint("mmap returned 64-bit pointer %p\n", v);
     85 				panic("mmap");
     86 			}
     87 			return v;
     88 		}
     89 	}
     90 	
     91 	/* Next try an anonymous map. */
     92 	v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
     93 	if(v != MAP_FAILED) {
     94 		if((uint32_t)(uintptr)v != (uintptr)v) {
     95 			iprint("mmap returned 64-bit pointer %p\n", v);
     96 			panic("mmap");
     97 		}
     98 		return v;
     99 	}
    100 
    101 	return nil;
    102 }
    103 
    104 void
    105 mmuinit(void)
    106 {
    107 	char tmp[] = "/var/tmp/9vx.pages.XXXXXX";
    108 	void *v;
    109 	int i;
    110 	
    111 	if((pagefile = mkstemp(tmp)) < 0)
    112 		panic("mkstemp: %r");
    113 	if(ftruncate(pagefile, memsize) < 0)
    114 		panic("ftruncate pagefile: %r");
    115 	unlink(tmp);	/* "remove on close" */
    116 
    117 	/* Map pages for direct access at pagebase, wherever that is */
    118 	/* MAP_SHARED means write the changes back to the file */
    119 	v = mmap(nil, memsize, PROT_READ|PROT_WRITE,
    120 		MAP_SHARED, pagefile, 0);
    121 	if(v == MAP_FAILED)	
    122 		panic("mmap pagefile: %r");
    123 	pagebase = v;
    124 
    125 	if(nuspace <= 0)
    126 		nuspace = 1;
    127 	if(nuspace > nelem(uspace))
    128 		nuspace = nelem(uspace);
    129 	for(i=0; i<nuspace; i++){
    130 		uspace[i].uzero = mapzero();
    131 		if(uspace[i].uzero == nil)
    132 			panic("mmap address space %d", i);
    133 		ulist[i] = &uspace[i];
    134 	}
    135 
    136 	conf.mem[0].base = 0;
    137 	conf.mem[0].npage = memsize / BY2PG;
    138 	
    139 	palloc.mem[0].base = 0;
    140 	palloc.mem[0].npage = memsize / BY2PG;
    141 }
    142 
    143 /*
    144  * Temporary page mappings are easy again:
    145  * everything is mapped at PAGEBASE.
    146  */
    147 void*
    148 tmpmap(Page *pg)
    149 {
    150 	assert(pg->pa < memsize);
    151 	return pagebase + pg->pa;
    152 }
    153 
    154 void
    155 tmpunmap(void *v)
    156 {
    157 	assert(pagebase <= (char*)v && (char*)v < pagebase + memsize);
    158 }
    159 
    160 KMap*
    161 kmap(Page *p)
    162 {
    163 	return (KMap*)tmpmap(p);
    164 }
    165 
    166 void
    167 kunmap(KMap *k)
    168 {
    169 }
    170 
    171 /*
    172  * Flush the current address space.
    173  */
    174 static void
    175 mmapflush(Uspace *us)
    176 {
    177 	m->flushmmu = 0;
    178 
    179 	/* Nothing mapped? */
    180 	if(us == nil || us->lo > us->hi || us->uzero == nil)
    181 		return;
    182 
    183 #ifdef __FreeBSD__
    184 	if(__FreeBSD__ < 7){
    185 		/*
    186 		 * On FreeBSD, we need to be able to use mincore to
    187 		 * tell whether a page is mapped, so we have to remap
    188 		 * something with no pages here. 
    189 		 */
    190 		if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE, 
    191 				MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
    192 			panic("mmapflush mmap: %r");
    193 		us->lo = 0x80000000UL;
    194 		us->hi = 0;
    195 		return;
    196 	}
    197 #endif
    198 
    199 	/* Clear only as much as got mapped. */
    200 	if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0)
    201 		panic("mmapflush mprotect: %r");
    202 	us->lo = 0x80000000UL;
    203 	us->hi = 0;
    204 }
    205 
    206 /*
    207  * Update the "MMU" in response to a user fault. 
    208  * pa may have PTEWRITE set.
    209  */
    210 void
    211 putmmu(ulong va, ulong pa, Page *p)
    212 {
    213 	int prot;
    214 	Uspace *us;
    215 
    216 	if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa)
    217 		iprint("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa);
    218 
    219 	assert(p->pa < memsize && pa < memsize);
    220 	assert(up);
    221 	us = up->pmmu.us;
    222 	assert(us);
    223 
    224 	/* Map the page */
    225 	prot = PROT_READ;
    226 	if(pa&PTEWRITE)
    227 		prot |= PROT_WRITE;
    228 	pa &= ~(BY2PG-1);
    229 	va  &= ~(BY2PG-1);
    230 	if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
    231 			pagefile, pa) == MAP_FAILED)
    232 		panic("putmmu");
    233 	
    234 	/* Record high and low address range for quick unmap. */
    235 	if(us->lo > va)
    236 		us->lo = va;
    237 	if(us->hi < va)
    238 		us->hi = va;
    239 //	printlinuxmaps();
    240 }
    241 
    242 /*
    243  * The memory maps have changed for up.  Flush all cached state.
    244  */
    245 void
    246 flushmmu(void)
    247 {
    248 	if(tracemmu)
    249 		iprint("flushmmu\n");
    250 
    251 	if(up){
    252 		vxproc_flush(up->pmmu.vxproc);
    253 		mmapflush(up->pmmu.us);
    254 	}
    255 }
    256 
    257 void
    258 usespace(Uspace *us)
    259 {
    260 	int i;
    261 	
    262 	for(i=0; i<nuspace; i++)
    263 		if(ulist[i] == us){
    264 			while(i > 0){
    265 				ulist[i] = ulist[i-1];
    266 				i--;
    267 			}
    268 			ulist[0] = us;
    269 			break;
    270 		}
    271 }
    272 
    273 Uspace*
    274 getspace(Proc *p)
    275 {
    276 	Uspace *us;
    277 	
    278 	us = ulist[nuspace-1];
    279 	if(us->p){
    280 		if(tracemmu)
    281 			iprint("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace);
    282 		mmapflush(us);
    283 	}
    284 	us->p = p;
    285 	p->pmmu.vxmm.base = us->uzero;
    286 	p->pmmu.uzero = us->uzero;
    287 	p->pmmu.us = us;
    288 	usespace(us);
    289 	return us;
    290 }
    291 
    292 void
    293 takespace(Proc *p, Uspace *us)
    294 {
    295 	usespace(us);
    296 	if(us->p == p)
    297 		return;
    298 	if(tracemmu){
    299 		if(us->p)
    300 			iprint("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace);
    301 	}
    302 	us->p = p;
    303 	mmapflush(us);
    304 }
    305 
    306 void
    307 putspace(Uspace *us)
    308 {
    309 	int i;
    310 
    311 	mmapflush(us);
    312 	us->p->pmmu.us = nil;
    313 	us->p->pmmu.uzero = nil;
    314 	us->p->pmmu.vxmm.base = nil;
    315 	us->p = nil;
    316 	for(i=0; i<nuspace; i++)
    317 		if(ulist[i] == us){
    318 			while(++i < nuspace)
    319 				ulist[i-1] = ulist[i];
    320 			ulist[i-1] = us;
    321 			break;
    322 		}
    323 }
    324 
    325 /*
    326  * Called when scheduler has decided to run proc p.
    327  * Prepare to run proc p.
    328  */
    329 void
    330 mmuswitch(Proc *p)
    331 {
    332 	/*
    333 	 * Switch the address space, but only if it's not the
    334 	 * one we were just in.  Also, kprocs don't count --
    335 	 * only the guys on cpu0 do.
    336 	 */
    337 	if(p->kp)
    338 		return;
    339 	
    340 	if(tracemmu)
    341 		iprint("mmuswitch %ld %s\n", p->pid, p->text);
    342 
    343 	if(p->pmmu.us && p->pmmu.us->p == p){
    344 		if(tracemmu) iprint("---------- %ld %s [%d]\n",
    345 			p->pid, p->text, p->pmmu.us - uspace);
    346 		usespace(p->pmmu.us);
    347 		if(!p->newtlb && !m->flushmmu){
    348 			usespace(p->pmmu.us);
    349 			return;
    350 		}
    351 		mmapflush(p->pmmu.us);
    352 		p->newtlb = 0;
    353 		return;
    354 	}
    355 
    356 	if(p->pmmu.us == nil)
    357 		getspace(p);
    358 	else
    359 		takespace(p, p->pmmu.us);
    360 	if(tracemmu) iprint("========== %ld %s [%d]\n",
    361 		p->pid, p->text, p->pmmu.us - uspace);
    362 }
    363 
    364 /*
    365  * Called when proc p is dying.
    366  */
    367 void
    368 mmurelease(Proc *p)
    369 {
    370 	if(p->kp)
    371 		return;
    372 	if(tracemmu)
    373 		iprint("mmurelease %ld %s\n", p->pid, p->text);
    374 	if(p->pmmu.vxproc)
    375 		vxproc_flush(p->pmmu.vxproc);
    376 	if(p->pmmu.us){
    377 		if(tracemmu)
    378 			iprint("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace);
    379 		putspace(p->pmmu.us);
    380 		if(m->flushmmu)
    381 			mmapflush(p->pmmu.us);
    382 	}
    383 }
    384 
    385 void
    386 printlinuxmaps(void)
    387 {
    388 	char buf[100];
    389 	sprint(buf, "cat /proc/%d/maps", getpid());
    390 	system(buf);
    391 }
    392 
    393 void
    394 mmusize(int size)
    395 {
    396 	static int set = 0;
    397 	if(!set && size){
    398 		memsize = (size << 20);
    399 	}
    400 }