vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

proc.c (29466B)


      1 #define	WANT_M
      2 #include	"u.h"
      3 #include	"lib.h"
      4 #include	"mem.h"
      5 #include	"dat.h"
      6 #include	"fns.h"
      7 #include	"error.h"
      8 #include	"trace.h"
      9 
     10 int	schedgain = 30;	/* units in seconds */
     11 int	nrdy;
     12 Ref	noteidalloc;
     13 
     14 void updatecpu(Proc*);
     15 int reprioritize(Proc*);
     16 
     17 ulong	delayedscheds;	/* statistics */
     18 long skipscheds;
     19 long preempts;
     20 ulong load;
     21 
     22 static Ref	pidalloc;
     23 
     24 static struct Procalloc
     25 {
     26 	Lock lk;
     27 	Proc*	ht[128];
     28 	Proc*	arena;
     29 	Proc*	free;
     30 } procalloc;
     31 
     32 enum
     33 {
     34 	Q=10,
     35 	DQ=4,
     36 	Scaling=2,
     37 };
     38 
     39 Schedq	runq[Nrq];
     40 ulong	runvec;
     41 
     42 char *statename[] =
     43 {	/* BUG: generate automatically */
     44 	"Dead",
     45 	"Moribund",
     46 	"Ready",
     47 	"Scheding",
     48 	"Running",
     49 	"Queueing",
     50 	"QueueingR",
     51 	"QueueingW",
     52 	"Wakeme",
     53 	"Broken",
     54 	"Stopped",
     55 	"Rendez",
     56 	"Waitrelease",
     57 };
     58 
     59 static void pidhash(Proc*);
     60 static void pidunhash(Proc*);
     61 static void rebalance(void);
     62 
     63 /*
     64  * Always splhi()'ed.
     65  */
     66 void
     67 schedinit(void)		/* never returns */
     68 {
     69 
     70 	setlabel(&m->sched);
     71 	if(traceprocs)	// Plan 9 VX
     72 		print("schedinit %p %p %s\n", m, up, up ? up->text : "");
     73 	if(up) {
     74 		m->proc = 0;
     75 		switch(up->state) {
     76 		case Running:
     77 			ready(up);
     78 			break;
     79 		case Moribund:
     80 			up->state = Dead;
     81 
     82 			/*
     83 			 * Holding locks from pexit:
     84 			 * 	procalloc
     85 			 *	palloc
     86 			 */
     87 			mmurelease(up);
     88 
     89 			up->qnext = procalloc.free;
     90 			procalloc.free = up;
     91 
     92 			unlock(&palloc.lk);
     93 			unlock(&procalloc.lk);
     94 			break;
     95 		}
     96 		up->mach = nil;
     97 		updatecpu(up);
     98 		up = nil;
     99 	}
    100 	sched();
    101 }
    102 
    103 /*
    104  *  If changing this routine, look also at sleep().  It
    105  *  contains a copy of the guts of sched().
    106  */
    107 void
    108 sched(void)
    109 {
    110 	Proc *p;
    111 
    112 	if(traceprocs)	// Plan 9 VX
    113 		print("sched %p %p [%s]\n", m, up, up ? up->text : "");
    114 	if(m->ilockdepth)
    115 		panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
    116 			m->machno,
    117 			m->ilockdepth,
    118 			up? up->lastilock: nil,
    119 			(up && up->lastilock)? up->lastilock->pc: 0,
    120 			getcallerpc(&p+2));
    121 	if(up){
    122 		/*
    123 		 * Delay the sched until the process gives up the locks
    124 		 * it is holding.  This avoids dumb lock loops.
    125 		 * Don't delay if the process is Moribund.
    126 		 * It called sched to die.
    127 		 * But do sched eventually.  This avoids a missing unlock
    128 		 * from hanging the entire kernel. 
    129 		 * But don't reschedule procs holding palloc or procalloc.
    130 		 * Those are far too important to be holding while asleep.
    131 		 *
    132 		 * This test is not exact.  There can still be a few instructions
    133 		 * in the middle of taslock when a process holds a lock
    134 		 * but Lock.p has not yet been initialized.
    135 		 */
    136 		if(up->nlocks.ref)
    137 		if(up->state != Moribund)
    138 		if(up->delaysched < 20
    139 		|| palloc.lk.p == up
    140 		|| procalloc.lk.p == up){
    141 			up->delaysched++;
    142  			delayedscheds++;
    143 			return;
    144 		}
    145 		up->delaysched = 0;
    146 
    147 		splhi();
    148 
    149 		/* statistics */
    150 		m->cs++;
    151 
    152 		procsave(up);
    153 		if(setlabel(&up->sched)){
    154 			if(traceprocs)
    155 				print("sched %p %p: awake\n", m, up);
    156 			procrestore(up);
    157 			spllo();
    158 			return;
    159 		}
    160 		if(traceprocs)
    161 			print("sched %p %p: entering scheduler\n", m, up);
    162 		gotolabel(&m->sched);
    163 	}
    164 	if(traceprocs)
    165 		print("sched %p %p: runproc", m, up);
    166 	p = runproc();
    167 	if(1){
    168 		updatecpu(p);
    169 		p->priority = reprioritize(p);
    170 	}
    171 	if(p != m->readied)
    172 		m->schedticks = msec() + HZ/10;
    173 	m->readied = 0;
    174 	up = p;
    175 	up->state = Running;
    176 	up->mach = MACHP(m->machno);
    177 	m->proc = up;
    178 	if(traceprocs)
    179 		print("run %p %p [%s]\n", m, up, up->text);
    180 	mmuswitch(up);
    181 	gotolabel(&up->sched);
    182 }
    183 
    184 int
    185 anyready(void)
    186 {
    187 	return runvec;
    188 }
    189 
    190 int
    191 anyhigher(void)
    192 {
    193 	return runvec & ~((1<<(up->priority+1))-1);
    194 }
    195 
    196 /*
    197  *  here once per clock tick to see if we should resched
    198  */
    199 void
    200 hzsched(void)
    201 {
    202 	/* once a second, rebalance will reprioritize ready procs */
    203 	if(m->machno == 0)
    204 		rebalance();
    205 
    206 	/* unless preempted, get to run for at least 100ms */
    207 	if(anyhigher()
    208 	|| (!up->fixedpri && msec() > m->schedticks && anyready())){
    209 		m->readied = nil;	/* avoid cooperative scheduling */
    210 		up->delaysched++;
    211 	}
    212 }
    213 
    214 /*
    215  *  here at the end of non-clock interrupts to see if we should preempt the
    216  *  current process.  Returns 1 if preempted, 0 otherwise.
    217  */
    218 int
    219 preempted(void)
    220 {
    221 	if(up && up->state == Running)
    222 	if(up->preempted == 0)
    223 	if(anyhigher())
    224 	if(!active.exiting){
    225 		m->readied = nil;	/* avoid cooperative scheduling */
    226 		up->preempted = 1;
    227 		sched();
    228 		splhi();
    229 		up->preempted = 0;
    230 		return 1;
    231 	}
    232 	return 0;
    233 }
    234 
    235 /*
    236  * Update the cpu time average for this particular process,
    237  * which is about to change from up -> not up or vice versa.
    238  * p->lastupdate is the last time an updatecpu happened.
    239  *
    240  * The cpu time average is a decaying average that lasts
    241  * about D clock ticks.  D is chosen to be approximately
    242  * the cpu time of a cpu-intensive "quick job".  A job has to run
    243  * for approximately D clock ticks before we home in on its 
    244  * actual cpu usage.  Thus if you manage to get in and get out
    245  * quickly, you won't be penalized during your burst.  Once you
    246  * start using your share of the cpu for more than about D
    247  * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 
    248  * below all the other quick jobs.  Interactive tasks, because
    249  * they basically always use less than their fair share of cpu,
    250  * will be rewarded.
    251  *
    252  * If the process has not been running, then we want to
    253  * apply the filter
    254  *
    255  *	cpu = cpu * (D-1)/D
    256  *
    257  * n times, yielding 
    258  * 
    259  *	cpu = cpu * ((D-1)/D)^n
    260  *
    261  * but D is big enough that this is approximately 
    262  *
    263  * 	cpu = cpu * (D-n)/D
    264  *
    265  * so we use that instead.
    266  * 
    267  * If the process has been running, we apply the filter to
    268  * 1 - cpu, yielding a similar equation.  Note that cpu is 
    269  * stored in fixed point (* 1000).
    270  *
    271  * Updatecpu must be called before changing up, in order
    272  * to maintain accurate cpu usage statistics.  It can be called
    273  * at any time to bring the stats for a given proc up-to-date.
    274  */
    275 void
    276 updatecpu(Proc *p)
    277 {
    278 	int n, t, ocpu;
    279 	int D = schedgain*HZ*Scaling;
    280 
    281 	if(p->edf)
    282 		return;
    283 
    284 	t = msec()*Scaling + Scaling/2;
    285 	n = t - p->lastupdate;
    286 	p->lastupdate = t;
    287 
    288 	if(n == 0)
    289 		return;
    290 	if(n > D)
    291 		n = D;
    292 
    293 	ocpu = p->cpu;
    294 	if(p != up)
    295 		p->cpu = (ocpu*(D-n))/D;
    296 	else{
    297 		t = 1000 - ocpu;
    298 		t = (t*(D-n))/D;
    299 		p->cpu = 1000 - t;
    300 	}
    301 
    302 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
    303 }
    304 
    305 /*
    306  * On average, p has used p->cpu of a cpu recently.
    307  * Its fair share is conf.nmach/m->load of a cpu.  If it has been getting
    308  * too much, penalize it.  If it has been getting not enough, reward it.
    309  * I don't think you can get much more than your fair share that 
    310  * often, so most of the queues are for using less.  Having a priority
    311  * of 3 means you're just right.  Having a higher priority (up to p->basepri) 
    312  * means you're not using as much as you could.
    313  */
    314 int
    315 reprioritize(Proc *p)
    316 {
    317 	int fairshare, n, load, ratio;
    318 
    319 	load = MACHP(0)->load;
    320 	if(load == 0)
    321 		return p->basepri;
    322 
    323 	/*
    324 	 *  fairshare = 1.000 * conf.nproc * 1.000/load,
    325 	 * except the decimal point is moved three places
    326 	 * on both load and fairshare.
    327 	 */
    328 	fairshare = (conf.nmach*1000*1000)/load;
    329 	n = p->cpu;
    330 	if(n == 0)
    331 		n = 1;
    332 	ratio = (fairshare+n/2) / n;
    333 	if(ratio > p->basepri)
    334 		ratio = p->basepri;
    335 	if(ratio < 0)
    336 		panic("reprioritize");
    337 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
    338 	return ratio;
    339 }
    340 
    341 /*
    342  * add a process to a scheduling queue
    343  */
    344 void
    345 queueproc(Schedq *rq, Proc *p)
    346 {
    347 	int pri;
    348 
    349 	pri = rq - runq;
    350 	lock(&runq->lk);
    351 	p->priority = pri;
    352 	p->rnext = 0;
    353 	if(rq->tail)
    354 		rq->tail->rnext = p;
    355 	else
    356 		rq->head = p;
    357 	rq->tail = p;
    358 	rq->n++;
    359 	nrdy++;
    360 	runvec |= 1<<pri;
    361 	unlock(&runq->lk);
    362 }
    363 
    364 /*
    365  *  try to remove a process from a scheduling queue (called splhi)
    366  */
    367 Proc*
    368 dequeueproc(Schedq *rq, Proc *tp)
    369 {
    370 	Proc *l, *p;
    371 
    372 	if(!canlock(&runq->lk))
    373 		return nil;
    374 
    375 	/*
    376 	 *  the queue may have changed before we locked runq,
    377 	 *  refind the target process.
    378 	 */
    379 	l = 0;
    380 	for(p = rq->head; p; p = p->rnext){
    381 		if(p == tp)
    382 			break;
    383 		l = p;
    384 	}
    385 
    386 	/*
    387 	 *  p->mach==0 only when process state is saved
    388 	 */
    389 	if(p == 0 || p->mach){
    390 		unlock(&runq->lk);
    391 		return nil;
    392 	}
    393 	if(p->rnext == 0)
    394 		rq->tail = l;
    395 	if(l)
    396 		l->rnext = p->rnext;
    397 	else
    398 		rq->head = p->rnext;
    399 	if(rq->head == nil)
    400 		runvec &= ~(1<<(rq-runq));
    401 	rq->n--;
    402 	nrdy--;
    403 	if(p->state != Ready)
    404 		print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
    405 
    406 	unlock(&runq->lk);
    407 	return p;
    408 }
    409 
    410 /*
    411  *  ready(p) picks a new priority for a process and sticks it in the
    412  *  runq for that priority.
    413  */
    414 void
    415 _ready(Proc *p)
    416 {
    417 	int s, pri;
    418 	Schedq *rq;
    419 	void (*pt)(Proc*, int, vlong);
    420 
    421 	s = splhi();
    422 	if(0){
    423 		splx(s);
    424 		return;
    425 	}
    426 
    427 	if(up != p)
    428 		m->readied = p;	/* group scheduling */
    429 
    430 	updatecpu(p);
    431 	pri = reprioritize(p);
    432 	p->priority = pri;
    433 	rq = &runq[pri];
    434 	p->state = Ready;
    435 	queueproc(rq, p);
    436 	pt = proctrace;
    437 	if(pt)
    438 		pt(p, SReady, 0);
    439 	splx(s);
    440 }
    441 
    442 /*
    443  *  yield the processor and drop our priority
    444  */
    445 void
    446 yield(void)
    447 {
    448 	if(anyready()){
    449 		/* pretend we just used 1/2 tick */
    450 		up->lastupdate -= Scaling/2;  
    451 		sched();
    452 	}
    453 }
    454 
    455 /*
    456  *  recalculate priorities once a second.  We need to do this
    457  *  since priorities will otherwise only be recalculated when
    458  *  the running process blocks.
    459  */
    460 ulong balancetime;
    461 
    462 static void
    463 rebalance(void)
    464 {
    465 	int pri, npri, t, x;
    466 	Schedq *rq;
    467 	Proc *p;
    468 
    469 	t = msec();
    470 	if(t - balancetime < HZ)
    471 		return;
    472 	balancetime = t;
    473 
    474 	for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
    475 another:
    476 		p = rq->head;
    477 		if(p == nil)
    478 			continue;
    479 		if(p->mp != MACHP(m->machno))
    480 			continue;
    481 		if(pri == p->basepri)
    482 			continue;
    483 		updatecpu(p);
    484 		npri = reprioritize(p);
    485 		if(npri != pri){
    486 			x = splhi();
    487 			p = dequeueproc(rq, p);
    488 			if(p)
    489 				queueproc(&runq[npri], p);
    490 			splx(x);
    491 			goto another;
    492 		}
    493 	}
    494 }
    495 	
    496 
    497 /*
    498  *  pick a process to run
    499  */
    500 Proc*
    501 _runproc(void)
    502 {
    503 	Schedq *rq;
    504 	Proc *p;
    505 	ulong start, now;
    506 	int i;
    507 	void (*pt)(Proc*, int, vlong);
    508 
    509 	start = perfticks();
    510 
    511 	/* cooperative scheduling until the clock ticks */
    512 	if((p=m->readied) && p->mach==0 && p->state==Ready
    513 	&& runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
    514 		skipscheds++;
    515 		rq = &runq[p->priority];
    516 		goto found;
    517 	}
    518 
    519 	preempts++;
    520 
    521 loop:
    522 	/*
    523 	 *  find a process that last ran on this processor (affinity),
    524 	 *  or one that hasn't moved in a while (load balancing).  Every
    525 	 *  time around the loop affinity goes down.
    526 	 */
    527 	spllo();
    528 	for(i = 0;; i++){
    529 		/*
    530 		 *  find the highest priority target process that this
    531 		 *  processor can run given affinity constraints.
    532 		 *
    533 		 */
    534 		for(rq = &runq[Nrq-1]; rq >= runq; rq--){
    535 			for(p = rq->head; p; p = p->rnext){
    536 				if(p->mp == nil || p->mp == MACHP(m->machno)
    537 				|| (!p->wired && i > 0))
    538 					goto found;
    539 			}
    540 		}
    541 
    542 		/* waste time or halt the CPU */
    543 		idlehands();
    544 
    545 		/* remember how much time we're here */
    546 		now = perfticks();
    547 		m->perf.inidle += now-start;
    548 		start = now;
    549 	}
    550 
    551 found:
    552 	splhi();
    553 	p = dequeueproc(rq, p);
    554 	if(p == nil)
    555 		goto loop;
    556 
    557 	p->state = Scheding;
    558 	p->mp = MACHP(m->machno);
    559 
    560 	pt = proctrace;
    561 	if(pt)
    562 		pt(p, SRun, 0);
    563 	return p;
    564 }
    565 
    566 int
    567 canpage(Proc *p)
    568 {
    569 	int ok = 0;
    570 
    571 	splhi();
    572 	lock(&runq->lk);
    573 	/* Only reliable way to see if we are Running */
    574 	if(p->mach == 0) {
    575 		p->newtlb = 1;
    576 		ok = 1;
    577 	}
    578 	unlock(&runq->lk);
    579 	spllo();
    580 
    581 	return ok;
    582 }
    583 
    584 Proc*
    585 newproc(void)
    586 {
    587 	char msg[64];
    588 	Proc *p;
    589 
    590 	lock(&procalloc.lk);
    591 	for(;;) {
    592 		if((p = procalloc.free))
    593 			break;
    594 
    595 		snprint(msg, sizeof msg, "no procs; %s forking",
    596 			up? up->text: "kernel");
    597 		unlock(&procalloc.lk);
    598 		resrcwait(msg);
    599 		lock(&procalloc.lk);
    600 	}
    601 	procalloc.free = p->qnext;
    602 	unlock(&procalloc.lk);
    603 
    604 	p->state = Scheding;
    605 	p->psstate = "New";
    606 	p->mach = 0;
    607 	p->qnext = 0;
    608 	p->nchild = 0;
    609 	p->nwait = 0;
    610 	p->waitq = 0;
    611 	p->parent = 0;
    612 	p->pgrp = 0;
    613 	p->egrp = 0;
    614 	p->fgrp = 0;
    615 	p->rgrp = 0;
    616 	p->pdbg = 0;
    617 	p->fpstate = FPinit;
    618 	p->kp = 0;
    619 	if(up && up->procctl == Proc_tracesyscall)
    620 		p->procctl = Proc_tracesyscall;
    621 	else
    622 		p->procctl = 0;
    623 	p->syscalltrace = 0;
    624 	p->notepending = 0;
    625 	p->ureg = 0;
    626 	p->privatemem = 0;
    627 	p->noswap = 0;
    628 	p->errstr = p->errbuf0;
    629 	p->syserrstr = p->errbuf1;
    630 	p->errbuf0[0] = '\0';
    631 	p->errbuf1[0] = '\0';
    632 	p->nlocks.ref = 0;
    633 	p->delaysched = 0;
    634 	p->trace = 0;
    635 	kstrdup(&p->user, "*nouser");
    636 	kstrdup(&p->text, "*notext");
    637 	kstrdup(&p->args, "");
    638 	p->nargs = 0;
    639 	p->setargs = 0;
    640 	memset(p->seg, 0, sizeof p->seg);
    641 	p->pid = incref(&pidalloc);
    642 	pidhash(p);
    643 	p->noteid = incref(&noteidalloc);
    644 	if(p->pid==0 || p->noteid==0)
    645 		panic("pidalloc");
    646 	if(p->kstack == 0)
    647 		p->kstack = smalloc(KSTACK);
    648 
    649 	/* sched params */
    650 	p->mp = 0;
    651 	p->wired = 0;
    652 	procpriority(p, PriNormal, 0);
    653 	p->cpu = 0;
    654 	p->lastupdate = msec()*Scaling;
    655 	p->edf = nil;
    656 
    657 	vxnewproc(p);
    658 	return p;
    659 }
    660 
    661 /*
    662  * wire this proc to a machine
    663  */
    664 void
    665 procwired(Proc *p, int bm)
    666 {
    667 	Proc *pp;
    668 	int i;
    669 	char nwired[MAXMACH];
    670 	Mach *wm;
    671 
    672 	if(bm < 0){
    673 		/* pick a machine to wire to */
    674 		memset(nwired, 0, sizeof(nwired));
    675 		p->wired = 0;
    676 		pp = proctab(0);
    677 		for(i=0; i<conf.nproc; i++, pp++){
    678 			wm = pp->wired;
    679 			if(wm && pp->pid)
    680 				nwired[wm->machno]++;
    681 		}
    682 		bm = 0;
    683 		for(i=0; i<conf.nmach; i++)
    684 			if(nwired[i] < nwired[bm])
    685 				bm = i;
    686 	} else {
    687 		/* use the virtual machine requested */
    688 		bm = bm % conf.nmach;
    689 	}
    690 
    691 	p->wired = MACHP(bm);
    692 	p->mp = p->wired;
    693 }
    694 
    695 void
    696 procpriority(Proc *p, int pri, int fixed)
    697 {
    698 	if(pri >= Npriq)
    699 		pri = Npriq - 1;
    700 	else if(pri < 0)
    701 		pri = 0;
    702 	p->basepri = pri;
    703 	p->priority = pri;
    704 	if(fixed){
    705 		p->fixedpri = 1;
    706 	} else {
    707 		p->fixedpri = 0;
    708 	}
    709 }
    710 
    711 void
    712 procinit0(void)		/* bad planning - clashes with devproc.c */
    713 {
    714 	Proc *p;
    715 	int i;
    716 
    717 	procalloc.free = xalloc(conf.nproc*sizeof(Proc));
    718 	if(procalloc.free == nil){
    719 		xsummary();
    720 		panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
    721 	}
    722 	procalloc.arena = procalloc.free;
    723 
    724 	p = procalloc.free;
    725 	for(i=0; i<conf.nproc-1; i++,p++)
    726 		p->qnext = p+1;
    727 	p->qnext = 0;
    728 }
    729 
    730 /*
    731  *  sleep if a condition is not true.  Another process will
    732  *  awaken us after it sets the condition.  When we awaken
    733  *  the condition may no longer be true.
    734  *
    735  *  we lock both the process and the rendezvous to keep r->p
    736  *  and p->r synchronized.
    737  */
    738 void
    739 sleep(Rendez *r, int (*f)(void*), void *arg)
    740 {
    741 	int s;
    742 	void (*pt)(Proc*, int, vlong);
    743 
    744 	s = splhi();
    745 
    746 	if(up->nlocks.ref)
    747 		print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n",
    748 			up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
    749 	lock(&r->lk);
    750 	lock(&up->rlock);
    751 	if(r->p){
    752 		print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
    753 		dumpstack();
    754 	}
    755 
    756 	/*
    757 	 *  Wakeup only knows there may be something to do by testing
    758 	 *  r->p in order to get something to lock on.
    759 	 *  Flush that information out to memory in case the sleep is
    760 	 *  committed.
    761 	 */
    762 	r->p = up;
    763 
    764 	if((*f)(arg) || up->notepending){
    765 		/*
    766 		 *  if condition happened or a note is pending
    767 		 *  never mind
    768 		 */
    769 		if(traceprocs)
    770 			print("sleep %p %p: already happened\n", m, up);
    771 		r->p = nil;
    772 		unlock(&up->rlock);
    773 		unlock(&r->lk);
    774 	} else {
    775 		/*
    776 		 *  now we are committed to
    777 		 *  change state and call scheduler
    778 		 */
    779 		pt = proctrace;
    780 		if(pt)
    781 			pt(up, SSleep, 0);
    782 		up->state = Wakeme;
    783 		up->r = r;
    784 
    785 		/* statistics */
    786 		m->cs++;
    787 
    788 		procsave(up);
    789 		if(setlabel(&up->sched)) {
    790 			if(traceprocs)
    791 				print("sleep %p %p: awake\n", m, up);
    792 			/*
    793 			 *  here when the process is awakened
    794 			 */
    795 			procrestore(up);
    796 			spllo();
    797 		} else {
    798 			if(traceprocs)
    799 				print("sleep %p %p: sleeping\n", m, up);
    800 			/*
    801 			 *  here to go to sleep (i.e. stop Running)
    802 			 */
    803 			unlock(&up->rlock);
    804 			unlock(&r->lk);
    805 			gotolabel(&m->sched);
    806 		}
    807 	}
    808 
    809 	if(up->notepending) {
    810 		up->notepending = 0;
    811 		splx(s);
    812 		if(up->procctl == Proc_exitme && up->closingfgrp)
    813 			forceclosefgrp();
    814 		error(Eintr);
    815 	}
    816 
    817 	splx(s);
    818 }
    819 
    820 static int
    821 tfn(void *arg)
    822 {
    823 	return up->trend == nil || up->tfn(arg);
    824 }
    825 
    826 void
    827 twakeup(Ureg *ureg, Timer *t)
    828 {
    829 	Proc *p;
    830 	Rendez *trend;
    831 
    832 	p = t->ta;
    833 	trend = p->trend;
    834 	p->trend = 0;
    835 	if(trend)
    836 		wakeup(trend);
    837 }
    838 
    839 void
    840 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
    841 {
    842 	if (up->timer.tt){
    843 		print("tsleep: timer active: mode %d, tf %#p\n", up->timer.tmode, up->timer.tf);
    844 		timerdel(&up->timer);
    845 	}
    846 	up->timer.tns = MS2NS(ms);
    847 	up->timer.tf = twakeup;
    848 	up->timer.tmode = Trelative;
    849 	up->timer.ta = up;
    850 	up->trend = r;
    851 	up->tfn = fn;
    852 	timeradd(&up->timer);
    853 
    854 	if(waserror()){
    855 		timerdel(&up->timer);
    856 		nexterror();
    857 	}
    858 	sleep(r, tfn, arg);
    859 	if (up->timer.tt)
    860 		timerdel(&up->timer);
    861 	up->timer.twhen = 0;
    862 	poperror();
    863 }
    864 
    865 /*
    866  *  Expects that only one process can call wakeup for any given Rendez.
    867  *  We hold both locks to ensure that r->p and p->r remain consistent.
    868  *  Richard Miller has a better solution that doesn't require both to
    869  *  be held simultaneously, but I'm a paranoid - presotto.
    870  */
    871 Proc*
    872 wakeup(Rendez *r)
    873 {
    874 	Proc *p;
    875 	int s;
    876 
    877 	s = splhi();
    878 
    879 	lock(&r->lk);
    880 	p = r->p;
    881 
    882 	if(p != nil){
    883 		lock(&p->rlock);
    884 		if(p->state != Wakeme || p->r != r){
    885 			iprint("%p %p %d\n", p->r, r, p->state);
    886 			panic("wakeup: state");
    887 		}
    888 		r->p = nil;
    889 		p->r = nil;
    890 		ready(p);
    891 		unlock(&p->rlock);
    892 	}
    893 	unlock(&r->lk);
    894 
    895 	splx(s);
    896 
    897 	return p;
    898 }
    899 
    900 /*
    901  *  if waking a sleeping process, this routine must hold both
    902  *  p->rlock and r->lock.  However, it can't know them in
    903  *  the same order as wakeup causing a possible lock ordering
    904  *  deadlock.  We break the deadlock by giving up the p->rlock
    905  *  lock if we can't get the r->lock and retrying.
    906  */
    907 int
    908 postnote(Proc *p, int dolock, char *n, int flag)
    909 {
    910 	int s, ret;
    911 	Rendez *r;
    912 	Proc *d, **l;
    913 
    914 	if(dolock)
    915 		qlock(&p->debug);
    916 
    917 	if(flag != NUser && (p->notify == 0 || p->notified))
    918 		p->nnote = 0;
    919 
    920 	ret = 0;
    921 	if(p->nnote < NNOTE) {
    922 		strcpy(p->note[p->nnote].msg, n);
    923 		p->note[p->nnote++].flag = flag;
    924 		ret = 1;
    925 	}
    926 	p->notepending = 1;
    927 	if(dolock)
    928 		qunlock(&p->debug);
    929 
    930 	/* this loop is to avoid lock ordering problems. */
    931 	for(;;){
    932 		s = splhi();
    933 		lock(&p->rlock);
    934 		r = p->r;
    935 
    936 		/* waiting for a wakeup? */
    937 		if(r == nil)
    938 			break;	/* no */
    939 
    940 		/* try for the second lock */
    941 		if(canlock(&r->lk)){
    942 			if(p->state != Wakeme || r->p != p)
    943 				panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
    944 			p->r = nil;
    945 			r->p = nil;
    946 			ready(p);
    947 			unlock(&r->lk);
    948 			break;
    949 		}
    950 
    951 		/* give other process time to get out of critical section and try again */
    952 		unlock(&p->rlock);
    953 		splx(s);
    954 		sched();
    955 	}
    956 	unlock(&p->rlock);
    957 	splx(s);
    958 
    959 	if(p->state != Rendezvous)
    960 		return ret;
    961 
    962 	/* Try and pull out of a rendezvous */
    963 	lock(&p->rgrp->ref.lk);
    964 	if(p->state == Rendezvous) {
    965 		p->rendval = ~0;
    966 		l = &REND(p->rgrp, p->rendtag);
    967 		for(d = *l; d; d = d->rendhash) {
    968 			if(d == p) {
    969 				*l = p->rendhash;
    970 				break;
    971 			}
    972 			l = &d->rendhash;
    973 		}
    974 		ready(p);
    975 	}
    976 	unlock(&p->rgrp->ref.lk);
    977 	return ret;
    978 }
    979 
    980 /*
    981  * weird thing: keep at most NBROKEN around
    982  */
    983 #define	NBROKEN 4
    984 struct
    985 {
    986 	QLock lk;
    987 	int	n;
    988 	Proc	*p[NBROKEN];
    989 }broken;
    990 
    991 void
    992 addbroken(Proc *p)
    993 {
    994 	qlock(&broken.lk);
    995 	if(broken.n == NBROKEN) {
    996 		ready(broken.p[0]);
    997 		memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
    998 		--broken.n;
    999 	}
   1000 	broken.p[broken.n++] = p;
   1001 	qunlock(&broken.lk);
   1002 
   1003 	p->state = Broken;
   1004 	p->psstate = 0;
   1005 	sched();
   1006 }
   1007 
   1008 void
   1009 unbreak(Proc *p)
   1010 {
   1011 	int b;
   1012 
   1013 	qlock(&broken.lk);
   1014 	for(b=0; b < broken.n; b++)
   1015 		if(broken.p[b] == p) {
   1016 			broken.n--;
   1017 			memmove(&broken.p[b], &broken.p[b+1],
   1018 					sizeof(Proc*)*(NBROKEN-(b+1)));
   1019 			ready(p);
   1020 			break;
   1021 		}
   1022 	qunlock(&broken.lk);
   1023 }
   1024 
   1025 int
   1026 freebroken(void)
   1027 {
   1028 	int i, n;
   1029 
   1030 	qlock(&broken.lk);
   1031 	n = broken.n;
   1032 	for(i=0; i<n; i++) {
   1033 		ready(broken.p[i]);
   1034 		broken.p[i] = 0;
   1035 	}
   1036 	broken.n = 0;
   1037 	qunlock(&broken.lk);
   1038 	return n;
   1039 }
   1040 
   1041 void
   1042 pexit(char *exitstr, int freemem)
   1043 {
   1044 	Proc *p;
   1045 	Segment **s, **es;
   1046 	long utime, stime;
   1047 	Waitq *wq, *f, *next;
   1048 	Fgrp *fgrp;
   1049 	Egrp *egrp;
   1050 	Rgrp *rgrp;
   1051 	Pgrp *pgrp;
   1052 	Chan *dot;
   1053 	void (*pt)(Proc*, int, vlong);
   1054 
   1055 	if(up->syscalltrace)
   1056 		free(up->syscalltrace);
   1057 	up->alarm = 0;
   1058 	if (up->timer.tt)
   1059 		timerdel(&up->timer);
   1060 	pt = proctrace;
   1061 	if(pt)
   1062 		pt(up, SDead, 0);
   1063 
   1064 	/* nil out all the resources under lock (free later) */
   1065 	qlock(&up->debug);
   1066 	fgrp = up->fgrp;
   1067 	up->fgrp = nil;
   1068 	egrp = up->egrp;
   1069 	up->egrp = nil;
   1070 	rgrp = up->rgrp;
   1071 	up->rgrp = nil;
   1072 	pgrp = up->pgrp;
   1073 	up->pgrp = nil;
   1074 	dot = up->dot;
   1075 	up->dot = nil;
   1076 	qunlock(&up->debug);
   1077 
   1078 	if(fgrp)
   1079 		closefgrp(fgrp);
   1080 	if(egrp)
   1081 		closeegrp(egrp);
   1082 	if(rgrp)
   1083 		closergrp(rgrp);
   1084 	if(dot)
   1085 		cclose(dot);
   1086 	if(pgrp)
   1087 		closepgrp(pgrp);
   1088 
   1089 	/*
   1090 	 * if not a kernel process and have a parent,
   1091 	 * do some housekeeping.
   1092 	 */
   1093 	if(up->kp == 0) {
   1094 		p = up->parent;
   1095 		if(p == 0) {
   1096 			if(exitstr == 0)
   1097 				exitstr = "unknown";
   1098 			panic("boot process died: %s", exitstr);
   1099 		}
   1100 
   1101 		while(waserror())
   1102 			;
   1103 
   1104 		wq = smalloc(sizeof(Waitq));
   1105 		poperror();
   1106 
   1107 		wq->w.pid = up->pid;
   1108 		utime = up->time[TUser] + up->time[TCUser];
   1109 		stime = up->time[TSys] + up->time[TCSys];
   1110 		wq->w.time[TUser] = tk2ms(utime);
   1111 		wq->w.time[TSys] = tk2ms(stime);
   1112 		wq->w.time[TReal] = tk2ms(msec() - up->time[TReal]);
   1113 		if(exitstr && exitstr[0])
   1114 			snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
   1115 		else
   1116 			wq->w.msg[0] = '\0';
   1117 
   1118 		lock(&p->exl);
   1119 		/*
   1120 		 * Check that parent is still alive.
   1121 		 */
   1122 		if(p->pid == up->parentpid && p->state != Broken) {
   1123 			p->nchild--;
   1124 			p->time[TCUser] += utime;
   1125 			p->time[TCSys] += stime;
   1126 			/*
   1127 			 * If there would be more than 128 wait records
   1128 			 * processes for my parent, then don't leave a wait
   1129 			 * record behind.  This helps prevent badly written
   1130 			 * daemon processes from accumulating lots of wait
   1131 			 * records.
   1132 		 	 */
   1133 			if(p->nwait < 128) {
   1134 				wq->next = p->waitq;
   1135 				p->waitq = wq;
   1136 				p->nwait++;
   1137 				wq = nil;
   1138 				wakeup(&p->waitr);
   1139 			}
   1140 		}
   1141 		unlock(&p->exl);
   1142 		if(wq)
   1143 			free(wq);
   1144 	}
   1145 
   1146 	if(!freemem)
   1147 		addbroken(up);
   1148 
   1149 	qlock(&up->seglock);
   1150 	es = &up->seg[NSEG];
   1151 	for(s = up->seg; s < es; s++) {
   1152 		if(*s) {
   1153 			putseg(*s);
   1154 			*s = 0;
   1155 		}
   1156 	}
   1157 	qunlock(&up->seglock);
   1158 
   1159 	lock(&up->exl);		/* Prevent my children from leaving waits */
   1160 	pidunhash(up);
   1161 	up->pid = 0;
   1162 	wakeup(&up->waitr);
   1163 	unlock(&up->exl);
   1164 
   1165 	for(f = up->waitq; f; f = next) {
   1166 		next = f->next;
   1167 		free(f);
   1168 	}
   1169 
   1170 	/* release debuggers */
   1171 	qlock(&up->debug);
   1172 	if(up->pdbg) {
   1173 		wakeup(&up->pdbg->sleep);
   1174 		up->pdbg = 0;
   1175 	}
   1176 	qunlock(&up->debug);
   1177 
   1178 	/* Sched must not loop for these locks */
   1179 	lock(&procalloc.lk);
   1180 	lock(&palloc.lk);
   1181 
   1182 	up->state = Moribund;
   1183 	sched();
   1184 	panic("pexit");
   1185 }
   1186 
   1187 int
   1188 haswaitq(void *x)
   1189 {
   1190 	Proc *p;
   1191 
   1192 	p = (Proc *)x;
   1193 	return p->waitq != 0;
   1194 }
   1195 
   1196 ulong
   1197 pwait(Waitmsg *w)
   1198 {
   1199 	ulong cpid;
   1200 	Waitq *wq;
   1201 
   1202 	if(!canqlock(&up->qwaitr))
   1203 		error(Einuse);
   1204 
   1205 	if(waserror()) {
   1206 		qunlock(&up->qwaitr);
   1207 		nexterror();
   1208 	}
   1209 
   1210 	lock(&up->exl);
   1211 	if(up->nchild == 0 && up->waitq == 0) {
   1212 		unlock(&up->exl);
   1213 		error(Enochild);
   1214 	}
   1215 	unlock(&up->exl);
   1216 
   1217 	sleep(&up->waitr, haswaitq, up);
   1218 
   1219 	lock(&up->exl);
   1220 	wq = up->waitq;
   1221 	up->waitq = wq->next;
   1222 	up->nwait--;
   1223 	unlock(&up->exl);
   1224 
   1225 	qunlock(&up->qwaitr);
   1226 	poperror();
   1227 
   1228 	if(w)
   1229 		memmove(w, &wq->w, sizeof(Waitmsg));
   1230 	cpid = wq->w.pid;
   1231 	free(wq);
   1232 	return cpid;
   1233 }
   1234 
   1235 Proc*
   1236 proctab(int i)
   1237 {
   1238 	return &procalloc.arena[i];
   1239 }
   1240 
   1241 void
   1242 dumpaproc(Proc *p)
   1243 {
   1244 	ulong bss;
   1245 	char *s;
   1246 
   1247 	if(p == 0)
   1248 		return;
   1249 
   1250 	bss = 0;
   1251 	if(p->seg[BSEG])
   1252 		bss = p->seg[BSEG]->top;
   1253 
   1254 	s = p->psstate;
   1255 	if(s == 0)
   1256 		s = statename[p->state];
   1257 	print("%3lud:%10s pc %8lux dbgpc %8lux  %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
   1258 		p->pid, p->text, p->pc, dbgpc(p),  s, statename[p->state],
   1259 		p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
   1260 }
   1261 
   1262 void
   1263 procdump(void)
   1264 {
   1265 	int i;
   1266 	Proc *p;
   1267 
   1268 	if(up)
   1269 		print("up %lud\n", up->pid);
   1270 	else
   1271 		print("no current process\n");
   1272 	for(i=0; i<conf.nproc; i++) {
   1273 		p = &procalloc.arena[i];
   1274 		if(p->state == Dead)
   1275 			continue;
   1276 
   1277 		dumpaproc(p);
   1278 	}
   1279 }
   1280 
   1281 /*
   1282  *  wait till all processes have flushed their mmu
   1283  *  state about segement s
   1284  */
   1285 void
   1286 procflushseg(Segment *s)
   1287 {
   1288 	int i, ns, nm, nwait;
   1289 	Proc *p;
   1290 
   1291 	/*
   1292 	 *  tell all processes with this
   1293 	 *  segment to flush their mmu's
   1294 	 */
   1295 	nwait = 0;
   1296 	for(i=0; i<conf.nproc; i++) {
   1297 		p = &procalloc.arena[i];
   1298 		if(p->state == Dead)
   1299 			continue;
   1300 		for(ns = 0; ns < NSEG; ns++)
   1301 			if(p->seg[ns] == s){
   1302 				p->newtlb = 1;
   1303 				for(nm = 0; nm < conf.nmach; nm++){
   1304 					if(MACHP(nm)->proc == p){
   1305 						MACHP(nm)->flushmmu = 1;
   1306 						nwait++;
   1307 					}
   1308 				}
   1309 				break;
   1310 			}
   1311 	}
   1312 
   1313 	if(nwait == 0)
   1314 		return;
   1315 
   1316 	/*
   1317 	 *  wait for all processors to take a clock interrupt
   1318 	 *  and flush their mmu's
   1319 	 */
   1320 	for(nm = 0; nm < conf.nmach; nm++)
   1321 		if(MACHP(nm) != m)
   1322 			while(MACHP(nm)->flushmmu)
   1323 				sched();
   1324 }
   1325 
   1326 void
   1327 scheddump(void)
   1328 {
   1329 	Proc *p;
   1330 	Schedq *rq;
   1331 
   1332 	for(rq = &runq[Nrq-1]; rq >= runq; rq--){
   1333 		if(rq->head == 0)
   1334 			continue;
   1335 		print("rq%ld:", rq-runq);
   1336 		for(p = rq->head; p; p = p->rnext)
   1337 			print(" %lud(%lud)", p->pid, msec() - p->readytime);
   1338 		print("\n");
   1339 		delay(150);
   1340 	}
   1341 	print("nrdy %d\n", nrdy);
   1342 }
   1343 
   1344 void
   1345 kproc(char *name, void (*func)(void *), void *arg)
   1346 {
   1347 	Proc *p;
   1348 	static Pgrp *kpgrp;
   1349 
   1350 	extern int tracekdev;
   1351 	if(tracekdev)
   1352 		iprint("kproc %s\n", name);
   1353 
   1354 	p = newproc();
   1355 	p->psstate = 0;
   1356 	p->procmode = 0640;
   1357 	p->kp = 1;
   1358 	p->noswap = 1;
   1359 
   1360 	if(up){
   1361 		p->fpsave = up->fpsave;
   1362 		p->scallnr = up->scallnr;
   1363 		p->s = up->s;
   1364 		p->slash = up->slash;
   1365 		p->dot = up->dot;
   1366 		if(p->dot)
   1367 			incref(&p->dot->ref);
   1368 
   1369 		memmove(p->note, up->note, sizeof(p->note));
   1370 		p->nnote = up->nnote;
   1371 		p->lastnote = up->lastnote;
   1372 		p->notify = up->notify;
   1373 	}
   1374 
   1375  	p->notified = 0;
   1376  	p->ureg = 0;
   1377  	p->dbgreg = 0;
   1378 	p->nerrlab = 0;
   1379 
   1380 	procpriority(p, PriKproc, 0);
   1381 
   1382 	kprocchild(p, func, arg);
   1383 
   1384 	kstrdup(&p->user, eve);
   1385 	kstrdup(&p->text, name);
   1386 	if(kpgrp == 0)
   1387 		kpgrp = newpgrp();
   1388 	p->pgrp = kpgrp;
   1389 	incref(&kpgrp->ref);
   1390 
   1391 	memset(p->time, 0, sizeof(p->time));
   1392 	p->time[TReal] = msec();
   1393 	ready(p);
   1394 }
   1395 
   1396 /*
   1397  *  called splhi() by notify().  See comment in notify for the
   1398  *  reasoning.
   1399  */
   1400 void
   1401 procctl(Proc *p)
   1402 {
   1403 	char *state;
   1404 	ulong s;
   1405 
   1406 	switch(p->procctl) {
   1407 	case Proc_exitbig:
   1408 		spllo();
   1409 		pexit("Killed: Insufficient physical memory", 1);
   1410 
   1411 	case Proc_exitme:
   1412 		spllo();		/* pexit has locks in it */
   1413 		pexit("Killed", 1);
   1414 
   1415 	case Proc_traceme:
   1416 		if(p->nnote == 0)
   1417 			return;
   1418 		/* No break */
   1419 
   1420 	case Proc_stopme:
   1421 		p->procctl = 0;
   1422 		state = p->psstate;
   1423 		p->psstate = "Stopped";
   1424 		/* free a waiting debugger */
   1425 		s = spllo();
   1426 		qlock(&p->debug);
   1427 		if(p->pdbg) {
   1428 			wakeup(&p->pdbg->sleep);
   1429 			p->pdbg = 0;
   1430 		}
   1431 		qunlock(&p->debug);
   1432 		splhi();
   1433 		p->state = Stopped;
   1434 		sched();
   1435 		p->psstate = state;
   1436 		splx(s);
   1437 		return;
   1438 	}
   1439 }
   1440 
   1441 #include "errstr.h"
   1442 
   1443 void
   1444 error(char *err)
   1445 {
   1446 	spllo();
   1447 
   1448 	assert(up->nerrlab < NERR);
   1449 	kstrcpy(up->errstr, err, ERRMAX);
   1450 	setlabel(&up->errlab[NERR-1]);
   1451 	nexterror();
   1452 }
   1453 
   1454 void
   1455 nexterror(void)
   1456 {
   1457 	gotolabel(&up->errlab[--up->nerrlab]);
   1458 }
   1459 
   1460 void
   1461 exhausted(char *resource)
   1462 {
   1463 	char buf[ERRMAX];
   1464 
   1465 	sprint(buf, "no free %s", resource);
   1466 	iprint("%s\n", buf);
   1467 	error(buf);
   1468 }
   1469 
   1470 void
   1471 killbig(char *why)
   1472 {
   1473 	int i;
   1474 	Segment *s;
   1475 	ulong l, max;
   1476 	Proc *p, *ep, *kp;
   1477 
   1478 	max = 0;
   1479 	kp = 0;
   1480 	ep = procalloc.arena+conf.nproc;
   1481 	for(p = procalloc.arena; p < ep; p++) {
   1482 		if(p->state == Dead || p->kp)
   1483 			continue;
   1484 		l = 0;
   1485 		for(i=1; i<NSEG; i++) {
   1486 			s = p->seg[i];
   1487 			if(s != 0)
   1488 				l += s->top - s->base;
   1489 		}
   1490 		if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
   1491 			kp = p;
   1492 			max = l;
   1493 		}
   1494 	}
   1495 
   1496 	print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
   1497 	for(p = procalloc.arena; p < ep; p++) {
   1498 		if(p->state == Dead || p->kp)
   1499 			continue;
   1500 		if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
   1501 			p->procctl = Proc_exitbig;
   1502 	}
   1503 	kp->procctl = Proc_exitbig;
   1504 	for(i = 0; i < NSEG; i++) {
   1505 		s = kp->seg[i];
   1506 		if(s != 0 && canqlock(&s->lk)) {
   1507 			mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
   1508 			qunlock(&s->lk);
   1509 		}
   1510 	}
   1511 }
   1512 
   1513 /*
   1514  *  change ownership to 'new' of all processes owned by 'old'.  Used when
   1515  *  eve changes.
   1516  */
   1517 void
   1518 renameuser(char *old, char *new)
   1519 {
   1520 	Proc *p, *ep;
   1521 
   1522 	ep = procalloc.arena+conf.nproc;
   1523 	for(p = procalloc.arena; p < ep; p++)
   1524 		if(p->user!=nil && strcmp(old, p->user)==0)
   1525 			kstrdup(&p->user, new);
   1526 }
   1527 
   1528 /*
   1529  *  time accounting called by clock() splhi'd
   1530  */
   1531 void
   1532 accounttime(void)
   1533 {
   1534 	Proc *p;
   1535 	ulong n, per;
   1536 	static ulong nrun;
   1537 
   1538 	p = m->proc;
   1539 	if(p) {
   1540 		nrun++;
   1541 		p->time[p->insyscall]++;
   1542 	}
   1543 
   1544 	/* calculate decaying duty cycles */
   1545 	n = perfticks();
   1546 	per = n - m->perf.last;
   1547 	m->perf.last = n;
   1548 	per = (m->perf.period*(HZ-1) + per)/HZ;
   1549 	if(per != 0)
   1550 		m->perf.period = per;
   1551 
   1552 	m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
   1553 	m->perf.inidle = 0;
   1554 
   1555 	m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
   1556 	m->perf.inintr = 0;
   1557 
   1558 	/* only one processor gets to compute system load averages */
   1559 	if(m->machno != 0)
   1560 		return;
   1561 
   1562 	/*
   1563 	 * calculate decaying load average.
   1564 	 * if we decay by (n-1)/n then it takes
   1565 	 * n clock ticks to go from load L to .36 L once
   1566 	 * things quiet down.  it takes about 5 n clock
   1567 	 * ticks to go to zero.  so using HZ means this is
   1568 	 * approximately the load over the last second,
   1569 	 * with a tail lasting about 5 seconds.
   1570 	 */
   1571 	n = nrun;
   1572 	nrun = 0;
   1573 	n = (nrdy+n)*1000;
   1574 	m->load = (m->load*(HZ-1)+n)/HZ;
   1575 }
   1576 
   1577 static void
   1578 pidhash(Proc *p)
   1579 {
   1580 	int h;
   1581 
   1582 	h = p->pid % nelem(procalloc.ht);
   1583 	lock(&procalloc.lk);
   1584 	p->pidhash = procalloc.ht[h];
   1585 	procalloc.ht[h] = p;
   1586 	unlock(&procalloc.lk);
   1587 }
   1588 
   1589 static void
   1590 pidunhash(Proc *p)
   1591 {
   1592 	int h;
   1593 	Proc **l;
   1594 
   1595 	h = p->pid % nelem(procalloc.ht);
   1596 	lock(&procalloc.lk);
   1597 	for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
   1598 		if(*l == p){
   1599 			*l = p->pidhash;
   1600 			break;
   1601 		}
   1602 	unlock(&procalloc.lk);
   1603 }
   1604 
   1605 int
   1606 procindex(ulong pid)
   1607 {
   1608 	Proc *p;
   1609 	int h;
   1610 	int s;
   1611 
   1612 	s = -1;
   1613 	h = pid % nelem(procalloc.ht);
   1614 	lock(&procalloc.lk);
   1615 	for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
   1616 		if(p->pid == pid){
   1617 			s = p - procalloc.arena;
   1618 			break;
   1619 		}
   1620 	unlock(&procalloc.lk);
   1621 	return s;
   1622 }