vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

devaoe.c (43271B)


      1 /*
      2  *	© 2005-8 coraid
      3  *	aoe storage initiator
      4  */
      5 
      6 #include "u.h"
      7 #include "lib.h"
      8 #include "mem.h"
      9 #include "dat.h"
     10 #include "fns.h"
     11 #include "io.h"
     12 #include "ureg.h"
     13 #include "error.h"
     14 #include "netif.h"
     15 #include "etherif.h"
     16 #include "ip/ip.h"
     17 #include "aoe.h"
     18 
     19 #define	WAKEUP(x)	wakeup(&((x)->rend))
     20 #define SLEEP(a,b,c)	sleep(&(a->rend), b, c)
     21 
     22 //#pragma	varargck argpos	eventlog	1
     23 
     24 #define dprint(...)	if(debug) eventlog(__VA_ARGS__); else USED(debug);
     25 #define uprint(...)	snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
     26 
     27 enum {
     28 	Maxunits	= 0xff,
     29 	Maxframes	= 128,
     30 	Maxmtu		= 100000,
     31 	Ndevlink	= 6,
     32 	Nea		= 6,
     33 	Nnetlink	= 6,
     34 };
     35 
     36 #define TYPE(q)		((ulong)(q).path & 0xf)
     37 #define UNIT(q)		(((ulong)(q).path>>4) & 0xff)
     38 #define L(q)		(((ulong)(q).path>>12) & 0xf)
     39 #define QID(u, t) 	((u)<<4 | (t))
     40 #define Q3(l, u, t)	((l)<<8 | QID(u, t))
     41 #define UP(d)		((d)->flag & Dup)
     42 
     43 #define	Ticks		msec()
     44 #define	Ms2tk(t)	(((t)*HZ)/1000)
     45 #define	Tk2ms(t)	(((t)*1000)/HZ)
     46 
     47 enum {
     48 	Qzero,
     49 	Qtopdir		= 1,
     50 	Qtopbase,
     51 	Qtopctl		= Qtopbase,
     52 	Qtoplog,
     53 	Qtopend,
     54 
     55 	Qunitdir,
     56 	Qunitbase,
     57 	Qctl		= Qunitbase,
     58 	Qdata,
     59 	Qconfig,
     60 	Qident,
     61 
     62 	Qdevlinkdir,
     63 	Qdevlinkbase,
     64 	Qdevlink	= Qdevlinkbase,
     65 	Qdevlinkend,
     66 
     67 	Qtopfiles	= Qtopend-Qtopbase,
     68 	Qdevlinkfiles	= Qdevlinkend-Qdevlinkbase,
     69 
     70 	Eventlen 	= 256,
     71 	Nevents 	= 64,
     72 
     73 	Fread		= 0,
     74 	Fwrite,
     75 	Tfree		= -1,
     76 	Tmgmt,
     77 
     78 	/* round trip bounds, timeouts, in ticks */
     79 	Rtmax		= Ms2tk(320),
     80 	Rtmin		= Ms2tk(20),
     81 	Srbtimeout	= 45*HZ,
     82 
     83 	Dbcnt		= 1024,
     84 
     85 	Crd		= 0x20,
     86 	Crdext		= 0x24,
     87 	Cwr		= 0x30,
     88 	Cwrext		= 0x34,
     89 	Cid		= 0xec,
     90 };
     91 
     92 enum {
     93 	Read,
     94 	Write,
     95 };
     96 
     97 /*
     98  * unified set of flags
     99  * a Netlink + Aoedev most both be jumbo capable
    100  * to send jumbograms to that interface.
    101  */
    102 enum {
    103 	/* sync with ahci.h */
    104 	Dllba 	= 1<<0,
    105 	Dsmart	= 1<<1,
    106 	Dpower	= 1<<2,
    107 	Dnop	= 1<<3,
    108 	Datapi	= 1<<4,
    109 	Datapi16= 1<<5,
    110 
    111 	/* aoe specific */
    112 	Dup	= 1<<6,
    113 	Djumbo	= 1<<7,
    114 };
    115 
    116 static char *flagname[] = {
    117 	"llba",
    118 	"smart",
    119 	"power",
    120 	"nop",
    121 	"atapi",
    122 	"atapi16",
    123 
    124 	"up",
    125 	"jumbo",
    126 };
    127 
    128 typedef struct {
    129 	uchar	flag;
    130 	uchar	lostjumbo;
    131 	int	datamtu;
    132 
    133 	Chan	*cc;
    134 	Chan	*dc;
    135 	Chan	*mtu;		/* open early to prevent bind issues. */
    136 	char	path[Maxpath];
    137 	uchar	ea[Eaddrlen];
    138 } Netlink;
    139 
    140 typedef struct {
    141 	Netlink	*nl;
    142 	int	nea;
    143 	ulong	eaidx;
    144 	uchar	eatab[Nea][Eaddrlen];
    145 	int	datamtu;
    146 	ulong	npkt;
    147 	ulong	resent;
    148 	uchar	flag;
    149 
    150 	ulong	rttavg;
    151 	ulong	mintimer;
    152 } Devlink;
    153 
    154 typedef struct Srb Srb;
    155 struct Srb {
    156 	Rendez	rend;
    157 	Srb	*next;
    158 	ulong	ticksent;
    159 	ulong	len;
    160 	vlong	sector;
    161 	short	write;
    162 	short	nout;
    163 	char	*error;
    164 	void	*dp;
    165 	void	*data;
    166 };
    167 
    168 typedef struct {
    169 	int	tag;
    170 	ulong	bcnt;
    171 	ulong	dlen;
    172 	vlong	lba;
    173 	ulong	ticksent;
    174 	int	nhdr;
    175 	uchar	hdr[ETHERMINTU];
    176 	void	*dp;
    177 	Devlink	*dl;
    178 	Netlink	*nl;
    179 	int	eaidx;
    180 	Srb	*srb;
    181 } Frame;
    182 
    183 typedef struct Aoedev Aoedev;
    184 struct Aoedev {
    185 	QLock	qlock;
    186 	Aoedev	*next;
    187 
    188 	ulong	vers;
    189 
    190 	int	ndl;
    191 	ulong	dlidx;
    192 	Devlink	*dl;
    193 	Devlink	dltab[Ndevlink];
    194 
    195 	ushort	fwver;
    196 	uchar	flag;
    197 	int	nopen;
    198 	int	major;
    199 	int	minor;
    200 	int	unit;
    201 	int	lasttag;
    202 	int	nframes;
    203 	Frame	*frames;
    204 	vlong	bsize;
    205 	vlong	realbsize;
    206 
    207 	uint	maxbcnt;
    208 	uint	maxmtu;
    209 	ulong	lostjumbo;
    210 	ushort	nout;
    211 	ushort	maxout;
    212 	ulong	lastwadj;
    213 	Srb	*head;
    214 	Srb	*tail;
    215 	Srb	*inprocess;
    216 
    217 	char	serial[20+1];
    218 	char	firmware[8+1];
    219 	char	model[40+1];
    220 	int	nconfig;
    221 	uchar	config[1024];
    222 	uchar	ident[512];
    223 };
    224 
    225 //#pragma	varargck type	"æ"	Aoedev*
    226 
    227 static struct {
    228 	Lock	lk;
    229 	QLock	qlock;
    230 	Rendez	rend;
    231 	char	buf[Eventlen*Nevents];
    232 	char	*rp;
    233 	char	*wp;
    234 } events;
    235 
    236 static struct {
    237 	RWlock	rwlock;
    238 	int	nd;
    239 	Aoedev	*d;
    240 } devs;
    241 
    242 static struct {
    243 	Lock	lk;
    244 	int	reader[Nnetlink];	/* reader is running. */
    245 	Rendez	rendez[Nnetlink];	/* confirm exit. */
    246 	Netlink	nl[Nnetlink];
    247 } netlinks;
    248 
    249 extern	Dev 	aoedevtab;
    250 static	Ref 	units;
    251 static	Ref	drivevers;
    252 static	int	debug;
    253 static	int	autodiscover	= 1;
    254 static	int	rediscover;
    255 	char 	Enotup[] 	= "aoe device is down";
    256 	char	Echange[]	= "media or partition has changed";
    257 
    258 static Srb*
    259 srballoc(ulong sz)
    260 {
    261 	Srb *srb;
    262 
    263 	srb = malloc(sizeof *srb+sz);
    264 	srb->dp = srb->data = srb+1;
    265 	srb->ticksent = Ticks;
    266 	return srb;
    267 }
    268 
    269 static Srb*
    270 srbkalloc(void *db, ulong dummy)
    271 {
    272 	Srb *srb;
    273 
    274 	srb = malloc(sizeof *srb);
    275 	srb->dp = srb->data = db;
    276 	srb->ticksent = Ticks;
    277 	return srb;
    278 }
    279 
    280 #define srbfree(srb) free(srb)
    281 
    282 static void
    283 srberror(Srb *srb, char *s)
    284 {
    285 	srb->error = s;
    286 	srb->nout--;
    287 	WAKEUP(srb);
    288 }
    289 
    290 static void
    291 frameerror(Aoedev *d, Frame *f, char *s)
    292 {
    293 	Srb *srb;
    294 
    295 	srb = f->srb;
    296 	if(f->tag == Tfree)
    297 		return;
    298 	f->srb = nil;
    299 	f->tag = Tfree;		/* don't get fooled by way-slow responses */
    300 	if(!srb)
    301 		return;
    302 	srberror(srb, s);
    303 	d->nout--;
    304 }
    305 
    306 static char*
    307 unitname(Aoedev *d)
    308 {
    309 	uprint("%d.%d", d->major, d->minor);
    310 	return up->genbuf;
    311 }
    312 
    313 static long
    314 eventlogread(void *a, long n)
    315 {
    316 	int len;
    317 	char *p, *buf;
    318 
    319 	buf = smalloc(Eventlen);
    320 	QLOCK(&events);
    321 	LOCK(&events);
    322 	p = events.rp;
    323 	len = *p;
    324 	if(len == 0){
    325 		n = 0;
    326 		UNLOCK(&events);
    327 	} else {
    328 		if(n > len)
    329 			n = len;
    330 		/* can't move directly into pageable space with events lock held */
    331 		memmove(buf, p+1, n);
    332 		*p = 0;
    333 		events.rp = p += Eventlen;
    334 		if(p >= events.buf + sizeof events.buf)
    335 			events.rp = events.buf;
    336 		UNLOCK(&events);
    337 
    338 		/* the concern here is page faults in memmove below */
    339 		if(waserror()){
    340 			free(buf);
    341 			QUNLOCK(&events);
    342 			nexterror();
    343 		}
    344 		memmove(a, buf, n);
    345 		poperror();
    346 	}
    347 	free(buf);
    348 	QUNLOCK(&events);
    349 	return n;
    350 }
    351 
    352 static int
    353 eventlog(char *fmt, ...)
    354 {
    355 	int dragrp, n;
    356 	char *p;
    357 	va_list arg;
    358 
    359 	LOCK(&events);
    360 	p = events.wp;
    361 	dragrp = *p++;
    362 	va_start(arg, fmt);
    363 	n = vsnprint(p, Eventlen-1, fmt, arg);
    364 	*--p = n;
    365 	p = events.wp += Eventlen;
    366 	if(p >= events.buf + sizeof events.buf)
    367 		p = events.wp = events.buf;
    368 	if(dragrp)
    369 		events.rp = p;
    370 	UNLOCK(&events);
    371 	WAKEUP(&events);
    372 	return n;
    373 }
    374 
    375 static int
    376 eventcount(void)
    377 {
    378 	int n;
    379 
    380 	LOCK(&events);
    381 	if(*events.rp == 0)
    382 		n = 0;
    383 	else if(events.wp < events.rp)
    384 		n = Nevents - (events.rp - events.wp);
    385 	else
    386 		n = events.wp - events.rp;
    387 	UNLOCK(&events);
    388 	return n/Eventlen;
    389 }
    390 
    391 static int
    392 tsince(int tag)
    393 {
    394 	int n;
    395 
    396 	n = Ticks & 0xffff;
    397 	n -= tag & 0xffff;
    398 	if(n < 0)
    399 		n += 1<<16;
    400 	return n;
    401 }
    402 
    403 static int
    404 newtag(Aoedev *d)
    405 {
    406 	int t;
    407 
    408 	do {
    409 		t = ++d->lasttag << 16;
    410 		t |= Ticks & 0xffff;
    411 	} while (t == Tfree || t == Tmgmt);
    412 	return t;
    413 }
    414 
    415 static void
    416 downdev(Aoedev *d, char *err)
    417 {
    418 	Frame *f, *e;
    419 
    420 	d->flag &= ~Dup;
    421 	f = d->frames;
    422 	e = f + d->nframes;
    423 	for(; f < e; f->tag = Tfree, f->srb = nil, f++)
    424 		frameerror(d, f, Enotup);
    425 	d->inprocess = nil;
    426 	eventlog("%æ: removed; %s\n", d, err);
    427 }
    428 
    429 static Block*
    430 allocfb(Frame *f)
    431 {
    432 	int len;
    433 	Block *b;
    434 
    435 	len = f->nhdr + f->dlen;
    436 	if(len < ETHERMINTU)
    437 		len = ETHERMINTU;
    438 	b = allocb(len);
    439 	memmove(b->wp, f->hdr, f->nhdr);
    440 	if(f->dlen)
    441 		memmove(b->wp + f->nhdr, f->dp, f->dlen);
    442 	b->wp += len;
    443 	return b;
    444 }
    445 
    446 static void
    447 putlba(Aoeata *a, vlong lba)
    448 {
    449 	uchar *c;
    450 
    451 	c = a->lba;
    452 	c[0] = lba;
    453 	c[1] = lba >> 8;
    454 	c[2] = lba >> 16;
    455 	c[3] = lba >> 24;
    456 	c[4] = lba >> 32;
    457 	c[5] = lba >> 40;
    458 }
    459 
    460 static Devlink*
    461 pickdevlink(Aoedev *d)
    462 {
    463 	ulong i, n;
    464 	Devlink *l;
    465 
    466 	for(i = 0; i < d->ndl; i++){
    467 		n = d->dlidx++ % d->ndl;
    468 		l = d->dl + n;
    469 		if(l && l->flag & Dup)
    470 			return l;
    471 	}
    472 	return 0;
    473 }
    474 
    475 static int
    476 pickea(Devlink *l)
    477 {
    478 	if(l == 0)
    479 		return -1;
    480 	if(l->nea == 0)
    481 		return -1;
    482 	return l->eaidx++ % l->nea;
    483 }
    484 
    485 static int
    486 hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
    487 {
    488 	int i;
    489 	Devlink *l;
    490 
    491 	if(f->srb)
    492 	if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
    493 		eventlog("%æ: srb timeout\n", d);
    494 		frameerror(d, f, Etimedout);
    495 		return -1;
    496 	}
    497 	l = pickdevlink(d);
    498 	i = pickea(l);
    499 	if(i == -1){
    500 		downdev(d, "resend fails; no netlink/ea");
    501 		return -1;
    502 	}
    503 	memmove(h->dst, l->eatab[i], Eaddrlen);
    504 	memmove(h->src, l->nl->ea, sizeof h->src);
    505 	hnputs(h->type, Aoetype);
    506 	h->verflag = Aoever << 4;
    507 	h->error = 0;
    508 	hnputs(h->major, d->major);
    509 	h->minor = d->minor;
    510 	h->cmd = cmd;
    511 
    512 	hnputl(h->tag, f->tag = newtag(d));
    513 	f->dl = l;
    514 	f->nl = l->nl;
    515 	f->eaidx = i;
    516 	f->ticksent = Ticks;
    517 
    518 	return f->tag;
    519 }
    520 
    521 static int
    522 resend(Aoedev *d, Frame *f)
    523 {
    524 	ulong n;
    525 	Aoeata *a;
    526 
    527 	a = (Aoeata*)f->hdr;
    528 	if(hset(d, f, (Aoehdr*)a, a->cmd) == -1)
    529 		return -1;
    530 	n = f->bcnt;
    531 	if(n > d->maxbcnt){
    532 		n = d->maxbcnt;		/* mtu mismatch (jumbo fail?) */
    533 		if(f->dlen > n)
    534 			f->dlen = n;
    535 	}
    536 	a->scnt = n / Aoesectsz;
    537 	f->dl->resent++;
    538 	f->dl->npkt++;
    539 	if(waserror())
    540 		/* should remove the netlink */
    541 		return -1;
    542 	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
    543 	poperror();
    544 	return 0;
    545 }
    546 
    547 static void
    548 discover(int major, int minor)
    549 {
    550 	Aoehdr *h;
    551 	Block *b;
    552 	Netlink *nl, *e;
    553 
    554 	nl = netlinks.nl;
    555 	e = nl + nelem(netlinks.nl);
    556 	for(; nl < e; nl++){
    557 		if(nl->cc == nil)
    558 			continue;
    559 		b = allocb(ETHERMINTU);
    560 		if(waserror()){
    561 			freeb(b);
    562 			nexterror();
    563 		}
    564 		b->wp = b->rp + ETHERMINTU;
    565 		memset(b->rp, 0, ETHERMINTU);
    566 		h = (Aoehdr*)b->rp;
    567 		memset(h->dst, 0xff, sizeof h->dst);
    568 		memmove(h->src, nl->ea, sizeof h->src);
    569 		hnputs(h->type, Aoetype);
    570 		h->verflag = Aoever << 4;
    571 		hnputs(h->major, major);
    572 		h->minor = minor;
    573 		h->cmd = ACconfig;
    574 		poperror();
    575 		devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
    576 	}
    577 }
    578 
    579 /*
    580  * Check all frames on device and resend any frames that have been
    581  * outstanding for 200% of the device round trip time average.
    582  */
    583 static void
    584 aoesweepproc(void *dummy)
    585 {
    586 	ulong i, tx, timeout, nbc;
    587 	vlong starttick;
    588 	enum { Nms = 100, Nbcms = 30*1000, };
    589 	uchar *ea;
    590 	Aoeata *a;
    591 	Aoedev *d;
    592 	Devlink *l;
    593 	Frame *f, *e;
    594 
    595 	nbc = Nbcms/Nms;
    596 loop:
    597 	if(nbc-- == 0){
    598 		if(rediscover && !waserror()){
    599 			discover(0xffff, 0xff);
    600 			poperror();
    601 		}
    602 		nbc = Nbcms/Nms;
    603 	}
    604 	starttick = Ticks;
    605 	RLOCK(&devs);
    606 	for(d = devs.d; d; d = d->next){
    607 		if(!CANQLOCK(d))
    608 			continue;
    609 		if(!UP(d)){
    610 			QUNLOCK(d);
    611 			continue;
    612 		}
    613 		tx = 0;
    614 		f = d->frames;
    615 		e = f + d->nframes;
    616 		for (; f < e; f++){
    617 			if(f->tag == Tfree)
    618 				continue;
    619 			l = f->dl;
    620 			timeout = l->rttavg << 1;
    621 			i = tsince(f->tag);
    622 			if(i < timeout)
    623 				continue;
    624 			if(d->nout == d->maxout){
    625 				if(d->maxout > 1)
    626 					d->maxout--;
    627 				d->lastwadj = Ticks;
    628 			}
    629 			a = (Aoeata*)f->hdr;
    630 			if(a->scnt > Dbcnt / Aoesectsz &&
    631 			   ++f->nl->lostjumbo > (d->nframes << 1)){
    632 				ea = f->dl->eatab[f->eaidx];
    633 				eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
    634 					d, f->nl->path, ea, f->lba);
    635 				d->maxbcnt = Dbcnt;
    636 				d->flag &= ~Djumbo;
    637 			}
    638 			resend(d, f);
    639 			if(tx++ == 0){
    640 				if((l->rttavg <<= 1) > Rtmax)
    641 					l->rttavg = Rtmax;
    642 				eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
    643 			}
    644 		}
    645 		if(d->nout == d->maxout && d->maxout < d->nframes &&
    646 		   TK2MS(Ticks-d->lastwadj) > 10*1000){
    647 			d->maxout++;
    648 			d->lastwadj = Ticks;
    649 		}
    650 		QUNLOCK(d);
    651 	}
    652 	RUNLOCK(&devs);
    653 	i = Nms - TK2MS(Ticks - starttick);
    654 	if(i > 0)
    655 		tsleep(&up->sleep, return0, 0, i);
    656 	goto loop;
    657 }
    658 
    659 static int
    660 fmtaoe(Fmt *f)
    661 {
    662 	char buf[16];
    663 	Aoedev *d;
    664 
    665 	d = va_arg(f->args, Aoedev*);
    666 	snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
    667 	return fmtstrcpy(f, buf);
    668 }
    669 
    670 static void netbind(char *path);
    671 
    672 static void
    673 aoecfg(void)
    674 {
    675 	int n, i;
    676 	char *p, *f[32], buf[24];
    677 
    678 	if(1)
    679 //	if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
    680 		return;
    681 	/* goo! */
    682 	for(i = 0; i < n; i++){
    683 		p = f[i];
    684 		if(strncmp(p, "ether", 5) == 0)
    685 			snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
    686 		else if(strncmp(p, "#l", 2) == 0)
    687 			snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
    688 		else
    689 			continue;
    690 		if(!waserror()){
    691 			netbind(buf);
    692 			poperror();
    693 		}
    694 	}
    695 }
    696 
    697 static void
    698 aoeinit(void)
    699 {
    700 	static int init;
    701 	static QLock l;
    702 
    703 	if(!canqlock(&l))
    704 		return;
    705 	if(init == 0){
    706 		fmtinstall(L'æ', fmtaoe);
    707 		events.rp = events.wp = events.buf;
    708 		kproc("aoesweep", aoesweepproc, nil);
    709 		aoecfg();
    710 		init = 1;
    711 	}
    712 	qunlock(&l);
    713 }
    714 
    715 static Chan*
    716 aoeattach(char *spec)
    717 {
    718 	Chan *c;
    719 
    720 	if(*spec)
    721 		error(Enonexist);
    722 	aoeinit();
    723 	c = devattach(L'æ', spec);
    724 	mkqid(&c->qid, Qzero, 0, QTDIR);
    725 	return c;
    726 }
    727 
    728 static Aoedev*
    729 unitseq(ulong unit)
    730 {
    731 	int i;
    732 	Aoedev *d;
    733 
    734 	i = 0;
    735 	RLOCK(&devs);
    736 	for(d = devs.d; d; d = d->next)
    737 		if(i++ == unit)
    738 			break;
    739 	RUNLOCK(&devs);
    740 	return d;
    741 }
    742 
    743 static Aoedev*
    744 unit2dev(ulong unit)
    745 {
    746 	Aoedev *d;
    747 
    748 	RLOCK(&devs);
    749 	for(d = devs.d; d; d = d->next)
    750 		if(d->unit == unit){
    751 			RUNLOCK(&devs);
    752 			return d;
    753 		}
    754 	RUNLOCK(&devs);
    755 	error("unit lookup failure");
    756 	return nil;
    757 }
    758 
    759 static int
    760 unitgen(Chan *c, ulong type, Dir *dp)
    761 {
    762 	int perm, t;
    763 	ulong vers;
    764 	vlong size;
    765 	char *p;
    766 	Aoedev *d;
    767 	Qid q;
    768 
    769 	d = unit2dev(UNIT(c->qid));
    770 	perm = 0644;
    771 	size = 0;
    772 	vers = d->vers;
    773 	t = QTFILE;
    774 
    775 	switch(type){
    776 	default:
    777 		return -1;
    778 	case Qctl:
    779 		p = "ctl";
    780 		break;
    781 	case Qdata:
    782 		p = "data";
    783 		perm = 0640;
    784 		if(UP(d))
    785 			size = d->bsize;
    786 		break;
    787 	case Qconfig:
    788 		p = "config";
    789 		if(UP(d))
    790 			size = d->nconfig;
    791 		break;
    792 	case Qident:
    793 		p = "ident";
    794 		if(UP(d))
    795 			size = sizeof d->ident;
    796 		break;
    797 	case Qdevlinkdir:
    798 		p = "devlink";
    799 		t = QTDIR;
    800 		perm = 0555;
    801 		break;
    802 	}
    803 	mkqid(&q, QID(UNIT(c->qid), type), vers, t);
    804 	devdir(c, q, p, size, eve, perm, dp);
    805 	return 1;
    806 }
    807 
    808 static int
    809 topgen(Chan *c, ulong type, Dir *d)
    810 {
    811 	int perm;
    812 	vlong size;
    813 	char *p;
    814 	Qid q;
    815 
    816 	perm = 0444;
    817 	size = 0;
    818 	switch(type){
    819 	default:
    820 		return -1;
    821 	case Qtopctl:
    822 		p = "ctl";
    823 		perm = 0644;
    824 		break;
    825 	case Qtoplog:
    826 		p = "log";
    827 		size = eventcount();
    828 		break;
    829 	}
    830 	mkqid(&q, type, 0, QTFILE);
    831 	devdir(c, q, p, size, eve, perm, d);
    832 	return 1;
    833 }
    834 
    835 static int
    836 aoegen(Chan *c, char *d0, Dirtab *d1, int d2, int s, Dir *dp)
    837 {
    838 	int i;
    839 	Aoedev *d;
    840 	Qid q;
    841 
    842 	if(c->qid.path == 0){
    843 		switch(s){
    844 		case DEVDOTDOT:
    845 			q.path = 0;
    846 			q.type = QTDIR;
    847 			devdir(c, q, "#æ", 0, eve, 0555, dp);
    848 			break;
    849 		case 0:
    850 			q.path = Qtopdir;
    851 			q.type = QTDIR;
    852 			devdir(c, q, "aoe", 0, eve, 0555, dp);
    853 			break;
    854 		default:
    855 			return -1;
    856 		}
    857 		return 1;
    858 	}
    859 
    860 	switch(TYPE(c->qid)){
    861 	default:
    862 		return -1;
    863 	case Qtopdir:
    864 		if(s == DEVDOTDOT){
    865 			mkqid(&q, Qzero, 0, QTDIR);
    866 			devdir(c, q, "aoe", 0, eve, 0555, dp);
    867 			return 1;
    868 		}
    869 		if(s < Qtopfiles)
    870 			return topgen(c, Qtopbase + s, dp);
    871 		s -= Qtopfiles;
    872 		if((d = unitseq(s)) == 0)
    873 			return -1;
    874 		mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
    875 		devdir(c, q, unitname(d), 0, eve, 0555, dp);
    876 		return 1;
    877 	case Qtopctl:
    878 	case Qtoplog:
    879 		return topgen(c, TYPE(c->qid), dp);
    880 	case Qunitdir:
    881 		if(s == DEVDOTDOT){
    882 			mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
    883 			uprint("%uld", UNIT(c->qid));
    884 			devdir(c, q, up->genbuf, 0, eve, 0555, dp);
    885 			return 1;
    886 		}
    887 		return unitgen(c, Qunitbase+s, dp);
    888 	case Qctl:
    889 	case Qdata:
    890 	case Qconfig:
    891 	case Qident:
    892 		return unitgen(c, TYPE(c->qid), dp);
    893 	case Qdevlinkdir:
    894 		i = UNIT(c->qid);
    895 		if(s == DEVDOTDOT){
    896 			mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
    897 			devdir(c, q, "devlink", 0, eve, 0555, dp);
    898 			return 1;
    899 		}
    900 		if(i >= units.ref)
    901 			return -1;
    902 		d = unit2dev(i);
    903 		if(s >= d->ndl)
    904 			return -1;
    905 		uprint("%d", s);
    906 		mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
    907 		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
    908 		return 1;
    909 	case Qdevlink:
    910 		uprint("%d", s);
    911 		mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
    912 		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
    913 		return 1;
    914 	}
    915 }
    916 
    917 static Walkqid*
    918 aoewalk(Chan *c, Chan *nc, char **name, int nname)
    919 {
    920 	return devwalk(c, nc, name, nname, nil, 0, aoegen);
    921 }
    922 
    923 static int
    924 aoestat(Chan *c, uchar *db, int n)
    925 {
    926 	return devstat(c, db, n, nil, 0, aoegen);
    927 }
    928 
    929 static Chan*
    930 aoeopen(Chan *c, int omode)
    931 {
    932 	Aoedev *d;
    933 
    934 	if(TYPE(c->qid) != Qdata)
    935 		return devopen(c, omode, 0, 0, aoegen);
    936 
    937 	d = unit2dev(UNIT(c->qid));
    938 	QLOCK(d);
    939 	if(waserror()){
    940 		QUNLOCK(d);
    941 		nexterror();
    942 	}
    943 	if(!UP(d))
    944 		error(Enotup);
    945 	c = devopen(c, omode, 0, 0, aoegen);
    946 	d->nopen++;
    947 	poperror();
    948 	QUNLOCK(d);
    949 	return c;
    950 }
    951 
    952 static void
    953 aoeclose(Chan *c)
    954 {
    955 	Aoedev *d;
    956 
    957 	if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
    958 		return;
    959 
    960 	d = unit2dev(UNIT(c->qid));
    961 	QLOCK(d);
    962 	if(--d->nopen == 0 && !waserror()){
    963 		discover(d->major, d->minor);
    964 		poperror();
    965 	}
    966 	QUNLOCK(d);
    967 }
    968 
    969 static void
    970 atarw(Aoedev *d, Frame *f)
    971 {
    972 	ulong bcnt;
    973 	char extbit, writebit;
    974 	Aoeata *ah;
    975 	Srb *srb;
    976 
    977 	extbit = 0x4;
    978 	writebit = 0x10;
    979 
    980 	srb = d->inprocess;
    981 	bcnt = d->maxbcnt;
    982 	if(bcnt > srb->len)
    983 		bcnt = srb->len;
    984 	f->nhdr = Szaoeata;
    985 	memset(f->hdr, 0, f->nhdr);
    986 	ah = (Aoeata*)f->hdr;
    987 	if(hset(d, f, (Aoehdr*)ah, ACata) == -1)
    988 		return;
    989 	f->dp = srb->dp;
    990 	f->bcnt = bcnt;
    991 	f->lba = srb->sector;
    992 	f->srb = srb;
    993 
    994 	ah->scnt = bcnt / Aoesectsz;
    995 	putlba(ah, f->lba);
    996 	if(d->flag & Dllba)
    997 		ah->aflag |= AAFext;
    998 	else {
    999 		extbit = 0;
   1000 		ah->lba[3] &= 0x0f;
   1001 		ah->lba[3] |= 0xe0;	/* LBA bit+obsolete 0xa0 */
   1002 	}
   1003 	if(srb->write){
   1004 		ah->aflag |= AAFwrite;
   1005 		f->dlen = bcnt;
   1006 	}else{
   1007 		writebit = 0;
   1008 		f->dlen = 0;
   1009 	}
   1010 	ah->cmdstat = 0x20 | writebit | extbit;
   1011 
   1012 	/* mark tracking fields and load out */
   1013 	srb->nout++;
   1014 	srb->dp = (uchar*)srb->dp + bcnt;
   1015 	srb->len -= bcnt;
   1016 	srb->sector += bcnt / Aoesectsz;
   1017 	if(srb->len == 0)
   1018 		d->inprocess = nil;
   1019 	d->nout++;
   1020 	f->dl->npkt++;
   1021 	if(waserror()){
   1022 		f->tag = Tfree;
   1023 		d->inprocess = nil;
   1024 		nexterror();
   1025 	}
   1026 	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
   1027 	poperror();
   1028 }
   1029 
   1030 static char*
   1031 aoeerror(Aoehdr *h)
   1032 {
   1033 	int n;
   1034 	static char *errs[] = {
   1035 		"aoe protocol error: unknown",
   1036 		"aoe protocol error: bad command code",
   1037 		"aoe protocol error: bad argument param",
   1038 		"aoe protocol error: device unavailable",
   1039 		"aoe protocol error: config string present",
   1040 		"aoe protocol error: unsupported version",
   1041 	};
   1042 
   1043 	if((h->verflag & AFerr) == 0)
   1044 		return 0;
   1045 	n = h->error;
   1046 	if(n > nelem(errs))
   1047 		n = 0;
   1048 	return errs[n];
   1049 }
   1050 
   1051 static void
   1052 rtupdate(Devlink *l, int rtt)
   1053 {
   1054 	int n;
   1055 
   1056 	n = rtt;
   1057 	if(rtt < 0){
   1058 		n = -rtt;
   1059 		if(n < Rtmin)
   1060 			n = Rtmin;
   1061 		else if(n > Rtmax)
   1062 			n = Rtmax;
   1063 		l->mintimer += (n - l->mintimer) >> 1;
   1064 	} else if(n < l->mintimer)
   1065 		n = l->mintimer;
   1066 	else if(n > Rtmax)
   1067 		n = Rtmax;
   1068 
   1069 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
   1070 	n -= l->rttavg;
   1071 	l->rttavg += n >> 2;
   1072 }
   1073 
   1074 static int
   1075 srbready(void *v)
   1076 {
   1077 	Srb *s;
   1078 
   1079 	s = v;
   1080 	return s->error || (!s->nout && !s->len);
   1081 }
   1082 
   1083 static Frame*
   1084 getframe(Aoedev *d, int tag)
   1085 {
   1086 	Frame *f, *e;
   1087 
   1088 	f = d->frames;
   1089 	e = f + d->nframes;
   1090 	for(; f < e; f++)
   1091 		if(f->tag == tag)
   1092 			return f;
   1093 	return nil;
   1094 }
   1095 
   1096 static Frame*
   1097 freeframe(Aoedev *d)
   1098 {
   1099 	if(d->nout < d->maxout)
   1100 		return getframe(d, Tfree);
   1101 	return nil;
   1102 }
   1103 
   1104 static void
   1105 work(Aoedev *d)
   1106 {
   1107 	Frame *f;
   1108 
   1109 	while(f = freeframe(d)) {
   1110 		if(d->inprocess == nil){
   1111 			if(d->head == nil)
   1112 				return;
   1113 			d->inprocess = d->head;
   1114 			d->head = d->head->next;
   1115 			if(d->head == nil)
   1116 				d->tail = nil;
   1117 		}
   1118 		atarw(d, f);
   1119 	}
   1120 }
   1121 
   1122 static void
   1123 strategy(Aoedev *d, Srb *srb)
   1124 {
   1125 	QLOCK(d);
   1126 	if(waserror()){
   1127 		QUNLOCK(d);
   1128 		nexterror();
   1129 	}
   1130 	srb->next = nil;
   1131 	if(d->tail)
   1132 		d->tail->next = srb;
   1133 	d->tail = srb;
   1134 	if(d->head == nil)
   1135 		d->head = srb;
   1136 	work(d);
   1137 	poperror();
   1138 	QUNLOCK(d);
   1139 
   1140 	while(waserror())
   1141 		;
   1142 	SLEEP(srb, srbready, srb);
   1143 	poperror();
   1144 }
   1145 
   1146 #define iskaddr(a)	(!up || (uintptr)(a) > up->pmmu.uzero+USTKTOP)
   1147 
   1148 static long
   1149 rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
   1150 {
   1151 	long n, nlen, copy;
   1152 	enum { Srbsz = 1<<19, };
   1153 	Srb *srb;
   1154 
   1155 	if((off|len) & (Aoesectsz-1))
   1156 		error("offset and length must be sector multiple.\n");
   1157 	if(off >= d->bsize)
   1158 		return 0;
   1159 	if(off + len > d->bsize)
   1160 		len = d->bsize - off;
   1161 	copy = 0;
   1162 	if(iskaddr(db)){
   1163 panic("iskaddr %p %p\n", db);
   1164 		srb = srbkalloc(db, len);
   1165 		copy = 1;
   1166 	}else
   1167 		srb = srballoc(Srbsz <= len? Srbsz: len);
   1168 	if(waserror()){
   1169 		srbfree(srb);
   1170 		nexterror();
   1171 	}
   1172 	srb->write = write;
   1173 	for(nlen = len; nlen; nlen -= n){
   1174 		if(!UP(d))
   1175 			error(Eio);
   1176 		srb->sector = off / Aoesectsz;
   1177 		srb->dp = srb->data;
   1178 		n = nlen;
   1179 		if(n > Srbsz)
   1180 			n = Srbsz;
   1181 		srb->len = n;
   1182 		if(write && !copy)
   1183 			memmove(srb->data, db, n);
   1184 		strategy(d, srb);
   1185 		if(srb->error)
   1186 			error(srb->error);
   1187 		if(!write && !copy)
   1188 			memmove(db, srb->data, n);
   1189 		db += n;
   1190 		off += n;
   1191 	}
   1192 	poperror();
   1193 	srbfree(srb);
   1194 	return len;
   1195 }
   1196 
   1197 static long
   1198 readmem(ulong off, void *dst, long n, void *src, long size)
   1199 {
   1200 	if(off >= size)
   1201 		return 0;
   1202 	if(off + n > size)
   1203 		n = size - off;
   1204 	memmove(dst, (uchar*)src + off, n);
   1205 	return n;
   1206 }
   1207 
   1208 static char*
   1209 pflag(char *s, char *e, uchar f)
   1210 {
   1211 	uchar i;
   1212 
   1213 	for(i = 0; i < nelem(flagname); i++)
   1214 		if(f & 1 << i)
   1215 			s = seprint(s, e, "%s ", flagname[i]);
   1216 	return seprint(s, e, "\n");
   1217 }
   1218 
   1219 static int
   1220 pstat(Aoedev *d, char *db, int len, int off)
   1221 {
   1222 	int i;
   1223 	char *state, *s, *p, *e;
   1224 
   1225 	s = p = malloc(1024);
   1226 	e = p + 1024;
   1227 
   1228 	state = "down";
   1229 	if(UP(d))
   1230 		state = "up";
   1231 
   1232 	p = seprint(p, e,
   1233 		"state: %s\n"	"nopen: %d\n"	"nout: %d\n"
   1234 		"nmaxout: %d\n"	"nframes: %d\n"	"maxbcnt: %d [maxmtu %d]\n"
   1235 		"fw: %.4ux\n"
   1236 		"model: %s\n"	"serial: %s\n"	"firmware: %s\n",
   1237 		state,		d->nopen,	d->nout,
   1238 		d->maxout, 	d->nframes,	d->maxbcnt, d->maxmtu,
   1239 		d->fwver,
   1240 		d->model, 	d->serial, 	d->firmware);
   1241 	p = seprint(p, e, "flag: ");
   1242 	p = pflag(p, e, d->flag);
   1243 
   1244 	if(p - s < len)
   1245 		len = p - s;
   1246 	i = readstr(off, db, len, s);
   1247 	free(s);
   1248 	return i;
   1249 }
   1250 
   1251 static long
   1252 unitread(Chan *c, void *db, long len, vlong off)
   1253 {
   1254 	Aoedev *d;
   1255 
   1256 	d = unit2dev(UNIT(c->qid));
   1257 	if(d->vers != c->qid.vers)
   1258 		error(Echange);
   1259 	switch(TYPE(c->qid)){
   1260 	default:
   1261 		error(Ebadarg);
   1262 	case Qctl:
   1263 		return pstat(d, db, len, off);
   1264 	case Qdata:
   1265 		return rw(d, Read, db, len, off);
   1266 	case Qconfig:
   1267 		if(!UP(d))
   1268 			error(Enotup);
   1269 		return readmem(off, db, len, d->config, d->nconfig);
   1270 	case Qident:
   1271 		if(!UP(d))
   1272 			error(Enotup);
   1273 		return readmem(off, db, len, d->ident, sizeof d->ident);
   1274 	}
   1275 }
   1276 
   1277 static int
   1278 devlinkread(Chan *c, void *db, int len, int off)
   1279 {
   1280 	int i;
   1281 	char *s, *p, *e;
   1282 	Aoedev *d;
   1283 	Devlink *l;
   1284 
   1285 	d = unit2dev(UNIT(c->qid));
   1286 	i = L(c->qid);
   1287 	if(i >= d->ndl)
   1288 		return 0;
   1289 	l = d->dl + i;
   1290 
   1291 	s = p = malloc(1024);
   1292 	e = s + 1024;
   1293 
   1294 	p = seprint(p, e, "addr: ");
   1295 	for(i = 0; i < l->nea; i++)
   1296 		p = seprint(p, e, "%E ", l->eatab[i]);
   1297 	p = seprint(p, e, "\n");
   1298 	p = seprint(p, e, "npkt: %uld\n", l->npkt);
   1299 	p = seprint(p, e, "resent: %uld\n", l->resent);
   1300 	p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
   1301 	p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
   1302 	p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
   1303 
   1304 	p = seprint(p, e, "nl path: %s\n", l->nl->path);
   1305 	p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
   1306 	p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
   1307 	p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
   1308 	p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
   1309 
   1310 	if(p - s < len)
   1311 		len = p - s;
   1312 	i = readstr(off, db, len, s);
   1313 	free(s);
   1314 	return i;
   1315 }
   1316 
   1317 static long
   1318 topctlread(Chan *d0, void *db, int len, int off)
   1319 {
   1320 	int i;
   1321 	char *s, *p, *e;
   1322 	Netlink *n;
   1323 
   1324 	s = p = malloc(1024);
   1325 	e = s + 1024;
   1326 
   1327 	p = seprint(p, e, "debug: %d\n", debug);
   1328 	p = seprint(p, e, "autodiscover: %d\n", autodiscover);
   1329 	p = seprint(p, e, "rediscover: %d\n", rediscover);
   1330 
   1331 	for(i = 0; i < Nnetlink; i++){
   1332 		n = netlinks.nl+i;
   1333 		if(n->cc == 0)
   1334 			continue;
   1335 		p = seprint(p, e, "if%d path: %s\n", i, n->path);
   1336 		p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
   1337 		p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
   1338 		p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
   1339 		p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
   1340 	}
   1341 
   1342 	if(p - s < len)
   1343 		len = p - s;
   1344 	i = readstr(off, db, len, s);
   1345 	free(s);
   1346 	return i;
   1347 }
   1348 
   1349 static long
   1350 aoeread(Chan *c, void *db, long n, vlong off)
   1351 {
   1352 	switch(TYPE(c->qid)){
   1353 	default:
   1354 		error(Eperm);
   1355 	case Qzero:
   1356 	case Qtopdir:
   1357 	case Qunitdir:
   1358 	case Qdevlinkdir:
   1359 		return devdirread(c, db, n, 0, 0, aoegen);
   1360 	case Qtopctl:
   1361 		return topctlread(c, db, n, off);
   1362 	case Qtoplog:
   1363 		return eventlogread(db, n);
   1364 	case Qctl:
   1365 	case Qdata:
   1366 	case Qconfig:
   1367 	case Qident:
   1368 		return unitread(c, db, n, off);
   1369 	case Qdevlink:
   1370 		return devlinkread(c, db, n, off);
   1371 	}
   1372 }
   1373 
   1374 static long
   1375 configwrite(Aoedev *d, void *db, long len)
   1376 {
   1377 	char *s;
   1378 	Aoeqc *ch;
   1379 	Frame *f;
   1380 	Srb *srb;
   1381 
   1382 	if(!UP(d))
   1383 		error(Enotup);
   1384 	if(len > sizeof d->config)
   1385 		error(Etoobig);
   1386 	srb = srballoc(len);
   1387 	s = malloc(len);
   1388 	memmove(s, db, len);
   1389 	if(waserror()){
   1390 		srbfree(srb);
   1391 		free(s);
   1392 		nexterror();
   1393 	}
   1394 	for (;;) {
   1395 		QLOCK(d);
   1396 		if(waserror()){
   1397 			QUNLOCK(d);
   1398 			nexterror();
   1399 		}
   1400 		f = freeframe(d);
   1401 		if(f != nil)
   1402 			break;
   1403 		poperror();
   1404 		QUNLOCK(d);
   1405 		if(waserror())
   1406 			nexterror();
   1407 		tsleep(&up->sleep, return0, 0, 100);
   1408 		poperror();
   1409 	}
   1410 	f->nhdr = Szaoeqc;
   1411 	memset(f->hdr, 0, f->nhdr);
   1412 	ch = (Aoeqc*)f->hdr;
   1413 	if(hset(d, f, (Aoehdr*)ch, ACconfig) == -1)
   1414 		return 0;
   1415 	f->srb = srb;
   1416 	f->dp = s;
   1417 	ch->verccmd = AQCfset;
   1418 	hnputs(ch->cslen, len);
   1419 	d->nout++;
   1420 	srb->nout++;
   1421 	f->dl->npkt++;
   1422 	f->dlen = len;
   1423 	/*
   1424 	 * these refer to qlock & waserror in the above for loop.
   1425 	 * there's still the first waserror outstanding.
   1426 	 */
   1427 	poperror();
   1428 	QUNLOCK(d);
   1429 
   1430 	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
   1431 	SLEEP(srb, srbready, srb);
   1432 	if(srb->error)
   1433 		error(srb->error);
   1434 
   1435 	QLOCK(d);
   1436 	if(waserror()){
   1437 		QUNLOCK(d);
   1438 		nexterror();
   1439 	}
   1440 	memmove(d->config, s, len);
   1441 	d->nconfig = len;
   1442 	poperror();
   1443 	QUNLOCK(d);
   1444 
   1445 	poperror();			/* pop first waserror */
   1446 
   1447 	srbfree(srb);
   1448 	memmove(db, s, len);
   1449 	free(s);
   1450 	return len;
   1451 }
   1452 
   1453 static int
   1454 getmtu(Chan *m)
   1455 {
   1456 	int n, mtu;
   1457 	char buf[36];
   1458 
   1459 	mtu = 1514;
   1460 	if(m == nil || waserror())
   1461 		return mtu;
   1462 	n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
   1463 	poperror();
   1464 	if(n > 12){
   1465 		buf[n] = 0;
   1466 		mtu = strtoul(buf + 12, 0, 0);
   1467 	}
   1468 	return mtu;
   1469 }
   1470 
   1471 static int
   1472 devmaxdata(Aoedev *d)
   1473 {
   1474 	int i, m, mtu;
   1475 	Devlink *l;
   1476 	Netlink *n;
   1477 
   1478 	mtu = 100000;
   1479 	for(i = 0; i < d->ndl; i++){
   1480 		l = d->dl + i;
   1481 		n = l->nl;
   1482 		if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
   1483 			continue;
   1484 		m = getmtu(n->mtu);
   1485 		if(m > l->datamtu)
   1486 			m = l->datamtu;
   1487 		if(m < mtu)
   1488 			mtu = m;
   1489 	}
   1490 	if(mtu == 100000)
   1491 		mtu = 1514;
   1492 	mtu -= Szaoeata;
   1493 	mtu -= mtu % Aoesectsz;
   1494 	return mtu;
   1495 }
   1496 
   1497 static int
   1498 toggle(char *s, int init)
   1499 {
   1500 	if(s == nil)
   1501 		return init ^ 1;
   1502 	return strcmp(s, "on") == 0;
   1503 }
   1504 
   1505 static void ataident(Aoedev*);
   1506 
   1507 static long
   1508 unitctlwrite(Aoedev *d, void *db, long n)
   1509 {
   1510 	uint maxbcnt, m;
   1511 	uvlong bsize;
   1512 	enum {
   1513 		Failio,
   1514 		Ident,
   1515 		Jumbo,
   1516 		Maxbno,
   1517 		Mtu,
   1518 		Setsize,
   1519 	};
   1520 	Cmdbuf *cb;
   1521 	Cmdtab *ct;
   1522 	static Cmdtab cmds[] = {
   1523 		{Failio, 	"failio", 	1 },
   1524 		{Ident, 	"identify", 	1 },
   1525 		{Jumbo, 	"jumbo", 	0 },
   1526 		{Maxbno,	"maxbno",	0 },
   1527 		{Mtu,		"mtu",		0 },
   1528 		{Setsize, 	"setsize", 	0 },
   1529 	};
   1530 
   1531 	cb = parsecmd(db, n);
   1532 	QLOCK(d);
   1533 	if(waserror()){
   1534 		QUNLOCK(d);
   1535 		free(cb);
   1536 		nexterror();
   1537 	}
   1538 	ct = lookupcmd(cb, cmds, nelem(cmds));
   1539 	switch(ct->index){
   1540 	case Failio:
   1541 		downdev(d, "i/o failure");
   1542 		break;
   1543 	case Ident:
   1544 		ataident(d);
   1545 		break;
   1546 	case Jumbo:
   1547 		m = 0;
   1548 		if(d->flag & Djumbo)
   1549 			m = 1;
   1550 		toggle(cb->f[1], m);
   1551 		if(m)
   1552 			d->flag |= Djumbo;
   1553 		else
   1554 			d->flag &= ~Djumbo;
   1555 		break;
   1556 	case Maxbno:
   1557 	case Mtu:
   1558 		maxbcnt = devmaxdata(d);
   1559 		if(cb->nf > 2)
   1560 			error(Ecmdargs);
   1561 		if(cb->nf == 2){
   1562 			m = strtoul(cb->f[1], 0, 0);
   1563 			if(ct->index == Maxbno)
   1564 				m *= Aoesectsz;
   1565 			else{
   1566 				m -= Szaoeata;
   1567 				m &= ~(Aoesectsz-1);
   1568 			}
   1569 			if(m == 0 || m > maxbcnt)
   1570 				cmderror(cb, "invalid mtu");
   1571 			maxbcnt = m;
   1572 			d->maxmtu = m;
   1573 		} else
   1574 			d->maxmtu = Maxmtu;
   1575 		d->maxbcnt = maxbcnt;
   1576 		break;
   1577 	case Setsize:
   1578 		bsize = d->realbsize;
   1579 		if(cb->nf > 2)
   1580 			error(Ecmdargs);
   1581 		if(cb->nf == 2){
   1582 			bsize = strtoull(cb->f[1], 0, 0);
   1583 			if(bsize % Aoesectsz)
   1584 				cmderror(cb, "disk size must be sector aligned");
   1585 		}
   1586 		d->bsize = bsize;
   1587 		break;
   1588 	default:
   1589 		cmderror(cb, "unknown aoe control message");
   1590 	}
   1591 	poperror();
   1592 	QUNLOCK(d);
   1593 	free(cb);
   1594 	return n;
   1595 }
   1596 
   1597 static long
   1598 unitwrite(Chan *c, void *db, long n, vlong off)
   1599 {
   1600 	long rv;
   1601 	char *buf;
   1602 	Aoedev *d;
   1603 
   1604 	d = unit2dev(UNIT(c->qid));
   1605 	switch(TYPE(c->qid)){
   1606 	default:
   1607 		error(Ebadarg);
   1608 	case Qctl:
   1609 		return unitctlwrite(d, db, n);
   1610 	case Qident:
   1611 		error(Eperm);
   1612 	case Qdata:
   1613 		return rw(d, Write, db, n, off);
   1614 	case Qconfig:
   1615 		if(off + n > sizeof d->config)
   1616 			error(Etoobig);
   1617 		buf = malloc(sizeof d->config);
   1618 		if(waserror()){
   1619 			free(buf);
   1620 			nexterror();
   1621 		}
   1622 		memmove(buf, d->config, d->nconfig);
   1623 		memmove(buf + off, db, n);
   1624 		rv = configwrite(d, buf, n + off);
   1625 		poperror();
   1626 		free(buf);
   1627 		return rv;
   1628 	}
   1629 }
   1630 
   1631 static Netlink*
   1632 addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
   1633 {
   1634 	Netlink *nl, *e;
   1635 
   1636 	LOCK(&netlinks);
   1637 	if(waserror()){
   1638 		UNLOCK(&netlinks);
   1639 		nexterror();
   1640 	}
   1641 	nl = netlinks.nl;
   1642 	e = nl + nelem(netlinks.nl);
   1643 	for(; nl < e && nl->cc; nl++)
   1644 		continue;
   1645 	if(nl == e)
   1646 		error("out of netlink structures");
   1647 	nl->cc = cc;
   1648 	nl->dc = dc;
   1649 	nl->mtu = mtu;
   1650 	strncpy(nl->path, path, sizeof nl->path);
   1651 	memmove(nl->ea, ea, sizeof nl->ea);
   1652 	poperror();
   1653 	nl->flag |= Dup;
   1654 	UNLOCK(&netlinks);
   1655 	return nl;
   1656 }
   1657 
   1658 static int
   1659 newunit(void)
   1660 {
   1661 	int x;
   1662 
   1663 	LOCK(&units);
   1664 	if(units.ref == Maxunits)
   1665 		x = -1;
   1666 	else
   1667 		x = units.ref++;
   1668 	UNLOCK(&units);
   1669 	return x;
   1670 }
   1671 
   1672 static int
   1673 dropunit(void)
   1674 {
   1675 	int x;
   1676 
   1677 	LOCK(&units);
   1678 	x = --units.ref;
   1679 	UNLOCK(&units);
   1680 	return x;
   1681 }
   1682 
   1683 /*
   1684  * always allocate max frames.  maxout may change.
   1685  */
   1686 static Aoedev*
   1687 newdev(long major, long minor, int n)
   1688 {
   1689 	Aoedev *d;
   1690 	Frame *f, *e;
   1691 
   1692 	d = malloc(sizeof *d);
   1693 	f = malloc(sizeof *f*Maxframes);
   1694 	if(!d || !f) {
   1695 		free(d);
   1696 		free(f);
   1697 		error("aoe device allocation failure");
   1698 	}
   1699 	d->nframes = n;
   1700 	d->frames = f;
   1701 	for (e = f + Maxframes; f < e; f++)
   1702 		f->tag = Tfree;
   1703 	d->maxout = n;
   1704 	d->major = major;
   1705 	d->minor = minor;
   1706 	d->maxbcnt = Dbcnt;
   1707 	d->flag = Djumbo;
   1708 	d->maxmtu = Maxmtu;
   1709 	d->unit = newunit();		/* bzzt.  inaccurate if units removed */
   1710 	if(d->unit == -1){
   1711 		free(d);
   1712 		free(d->frames);
   1713 		error("too many units");
   1714 	}
   1715 	d->dl = d->dltab;
   1716 	return d;
   1717 }
   1718 
   1719 static Aoedev*
   1720 mm2dev(int major, int minor)
   1721 {
   1722 	Aoedev *d;
   1723 
   1724 	RLOCK(&devs);
   1725 	for(d = devs.d; d; d = d->next)
   1726 		if(d->major == major && d->minor == minor){
   1727 			RUNLOCK(&devs);
   1728 			return d;
   1729 		}
   1730 	RUNLOCK(&devs);
   1731 	eventlog("mm2dev: %d.%d not found\n", major, minor);
   1732 	return nil;
   1733 }
   1734 
   1735 /* Find the device in our list.  If not known, add it */
   1736 static Aoedev*
   1737 getdev(long major, long minor, int n)
   1738 {
   1739 	Aoedev *d;
   1740 
   1741 	if(major == 0xffff || minor == 0xff)
   1742 		return 0;
   1743 	WLOCK(&devs);
   1744 	if(waserror()){
   1745 		WUNLOCK(&devs);
   1746 		nexterror();
   1747 	}
   1748 	for(d = devs.d; d; d = d->next)
   1749 		if(d->major == major && d->minor == minor)
   1750 			break;
   1751 	if(d == nil) {
   1752 		d = newdev(major, minor, n);
   1753 		d->next = devs.d;
   1754 		devs.d = d;
   1755 	}
   1756 	poperror();
   1757 	WUNLOCK(&devs);
   1758 	return d;
   1759 }
   1760 
   1761 static ushort
   1762 gbit16(void *a)
   1763 {
   1764 	uchar *i;
   1765 
   1766 	i = a;
   1767 	return i[1] << 8 | i[0];
   1768 }
   1769 
   1770 static ulong
   1771 gbit32(void *a)
   1772 {
   1773 	ulong j;
   1774 	uchar *i;
   1775 
   1776 	i = a;
   1777 	j  = i[3] << 24;
   1778 	j |= i[2] << 16;
   1779 	j |= i[1] << 8;
   1780 	j |= i[0];
   1781 	return j;
   1782 }
   1783 
   1784 static uvlong
   1785 gbit64(void *a)
   1786 {
   1787 	uchar *i;
   1788 
   1789 	i = a;
   1790 	return (uvlong)gbit32(i+4) << 32 | gbit32(a);
   1791 }
   1792 
   1793 static void
   1794 ataident(Aoedev *d)
   1795 {
   1796 	Aoeata *a;
   1797 	Block *b;
   1798 	Frame *f;
   1799 
   1800 	f = freeframe(d);
   1801 	if(f == nil)
   1802 		return;
   1803 	f->nhdr = Szaoeata;
   1804 	memset(f->hdr, 0, f->nhdr);
   1805 	a = (Aoeata*)f->hdr;
   1806 	if(hset(d, f, (Aoehdr*)a, ACata) == -1)
   1807 		return;
   1808 	f->srb = srbkalloc(0, 0);
   1809 	a->cmdstat = Cid;	/* ata 6, page 110 */
   1810 	a->scnt = 1;
   1811 	a->lba[3] = 0xa0;
   1812 	d->nout++;
   1813 	f->dl->npkt++;
   1814 	f->bcnt = 512;
   1815 	f->dlen = 0;
   1816 	b = allocfb(f);
   1817 	devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
   1818 }
   1819 
   1820 static int
   1821 newdlea(Devlink *l, uchar *ea)
   1822 {
   1823 	int i;
   1824 	uchar *t;
   1825 
   1826 	for(i = 0; i < Nea; i++){
   1827 		t = l->eatab[i];
   1828 		if(i == l->nea){
   1829 			memmove(t, ea, Eaddrlen);
   1830 			return l->nea++;
   1831 		}
   1832 		if(memcmp(t, ea, Eaddrlen) == 0)
   1833 			return i;
   1834 	}
   1835 	return -1;
   1836 }
   1837 
   1838 static Devlink*
   1839 newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
   1840 {
   1841 	int i;
   1842 	Devlink *l;
   1843 
   1844 	for(i = 0; i < Ndevlink; i++){
   1845 		l = d->dl + i;
   1846 		if(i == d->ndl){
   1847 			d->ndl++;
   1848 			newdlea(l, c->src);
   1849 			l->datamtu = c->scnt*Aoesectsz;
   1850 			l->nl = n;
   1851 			l->flag |= Dup;
   1852 			l->mintimer = Rtmin;
   1853 			l->rttavg = Rtmax;
   1854 			return l;
   1855 		}
   1856 		if(l->nl == n){
   1857 			newdlea(l, c->src);
   1858 			l->datamtu = c->scnt*Aoesectsz;
   1859 			l->flag |= Dup;
   1860 			return l;
   1861 		}
   1862 	}
   1863 	eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
   1864 	return 0;
   1865 }
   1866 
   1867 static void
   1868 errrsp(Block *b, char *s)
   1869 {
   1870 	int n;
   1871 	Aoedev *d;
   1872 	Aoehdr *h;
   1873 	Frame *f;
   1874 
   1875 	h = (Aoehdr*)b->rp;
   1876 	n = nhgetl(h->tag);
   1877 	if(n == Tmgmt || n == Tfree)
   1878 		return;
   1879 	d = mm2dev(nhgets(h->major), h->minor);
   1880 	if(d == 0)
   1881 		return;
   1882 	if(f = getframe(d, n))
   1883 		frameerror(d, f, s);
   1884 }
   1885 
   1886 static void
   1887 qcfgrsp(Block *b, Netlink *nl)
   1888 {
   1889 	int major, cmd, cslen, blen;
   1890 	unsigned n;
   1891 	Aoedev *d;
   1892 	Aoeqc *ch;
   1893 	Devlink *l;
   1894 	Frame *f;
   1895 
   1896 	ch = (Aoeqc*)b->rp;
   1897 	major = nhgets(ch->major);
   1898 	n = nhgetl(ch->tag);
   1899 	if(n != Tmgmt){
   1900 		d = mm2dev(major, ch->minor);
   1901 		if(d == nil)
   1902 			return;
   1903 		QLOCK(d);
   1904 		f = getframe(d, n);
   1905 		if(f == nil){
   1906 			QUNLOCK(d);
   1907 			eventlog("%æ: unknown response tag %ux\n", d, n);
   1908 			return;
   1909 		}
   1910 		cslen = nhgets(ch->cslen);
   1911 		blen = BLEN(b) - Szaoeqc;
   1912 		if(cslen < blen)
   1913 			eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
   1914 				d, n, cslen, blen);
   1915 		if(cslen > blen){
   1916 			eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
   1917 				d, n, cslen, blen);
   1918 			cslen = blen;
   1919 		}
   1920 		memmove(f->dp, ch + 1, cslen);
   1921 		f->srb->nout--;
   1922 		WAKEUP(f->srb);
   1923 		d->nout--;
   1924 		f->srb = nil;
   1925 		f->tag = Tfree;
   1926 		QUNLOCK(d);
   1927 		return;
   1928 	}
   1929 
   1930 	cmd = ch->verccmd & 0xf;
   1931 	if(cmd != 0){
   1932 		eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
   1933 		return;
   1934 	}
   1935 	n = nhgets(ch->bufcnt);
   1936 	if(n > Maxframes)
   1937 		n = Maxframes;
   1938 
   1939 	if(waserror()){
   1940 		eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
   1941 		return;
   1942 	}
   1943 	d = getdev(major, ch->minor, n);
   1944 	poperror();
   1945 	if(d == 0)
   1946 		return;
   1947 
   1948 	QLOCK(d);
   1949 	*up->errstr = 0;
   1950 	if(waserror()){
   1951 		QUNLOCK(d);
   1952 		eventlog("%æ: %s\n", d, up->errstr);
   1953 		nexterror();
   1954 	}
   1955 
   1956 	l = newdevlink(d, nl, ch);		/* add this interface. */
   1957 
   1958 	d->fwver = nhgets(ch->fwver);
   1959 	n = nhgets(ch->cslen);
   1960 	if(n > sizeof d->config)
   1961 		n = sizeof d->config;
   1962 	d->nconfig = n;
   1963 	memmove(d->config, ch + 1, n);
   1964 
   1965 	/* manually set mtu may be reset lower if conditions warrant */
   1966 	if(l){
   1967 		n = devmaxdata(d);
   1968 		if(!(d->flag & Djumbo))
   1969 			n = Dbcnt;
   1970 		if(n > d->maxmtu)
   1971 			n = d->maxmtu;
   1972 		if(n != d->maxbcnt){
   1973 			eventlog("%æ: setting %d byte mtu on %s:%E\n",
   1974 				d, n, nl->path, nl->ea);
   1975 			d->maxbcnt = n;
   1976 		}
   1977 	}
   1978 	if(d->nopen == 0)
   1979 		ataident(d);
   1980 	poperror();
   1981 	QUNLOCK(d);
   1982 }
   1983 
   1984 static void
   1985 idmove(char *p, ushort *a, unsigned n)
   1986 {
   1987 	int i;
   1988 	char *op, *e;
   1989 
   1990 	op = p;
   1991 	for(i = 0; i < n / 2; i++){
   1992 		*p++ = a[i] >> 8;
   1993 		*p++ = a[i];
   1994 	}
   1995 	*p = 0;
   1996 	while(p > op && *--p == ' ')
   1997 		*p = 0;
   1998 	e = p;
   1999 	p = op;
   2000 	while(*p == ' ')
   2001 		p++;
   2002 	memmove(op, p, n - (e - p));
   2003 }
   2004 
   2005 static vlong
   2006 aoeidentify(Aoedev *d, ushort *id)
   2007 {
   2008 	int i;
   2009 	vlong s;
   2010 
   2011 	d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
   2012 
   2013 	i = gbit16(id+83) | gbit16(id+86);
   2014 	if(i & (1<<10)){
   2015 		d->flag |= Dllba;
   2016 		s = gbit64(id+100);
   2017 	}else
   2018 		s = gbit32(id+60);
   2019 
   2020 	i = gbit16(id+83);
   2021 	if((i>>14) == 1) {
   2022 		if(i & (1<<3))
   2023 			d->flag  |= Dpower;
   2024 		i = gbit16(id+82);
   2025 		if(i & 1)
   2026 			d->flag  |= Dsmart;
   2027 		if(i & (1<<14))
   2028 			d->flag  |= Dnop;
   2029 	}
   2030 //	eventlog("%æ up\n", d);
   2031 	d->flag |= Dup;
   2032 	memmove(d->ident, id, sizeof d->ident);
   2033 	return s;
   2034 }
   2035 
   2036 static void
   2037 newvers(Aoedev *d)
   2038 {
   2039 	LOCK(&drivevers);
   2040 	d->vers = drivevers.ref++;
   2041 	UNLOCK(&drivevers);
   2042 }
   2043 
   2044 static int
   2045 identify(Aoedev *d, ushort *id)
   2046 {
   2047 	vlong osectors, s;
   2048 	uchar oserial[21];
   2049 
   2050 	s = aoeidentify(d, id);
   2051 	if(s == -1)
   2052 		return -1;
   2053 	osectors = d->realbsize;
   2054 	memmove(oserial, d->serial, sizeof d->serial);
   2055 
   2056 	idmove(d->serial, id+10, 20);
   2057 	idmove(d->firmware, id+23, 8);
   2058 	idmove(d->model, id+27, 40);
   2059 
   2060 	s *= Aoesectsz;
   2061 	if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
   2062 		d->bsize = s;
   2063 		d->realbsize = s;
   2064 //		d->mediachange = 1;
   2065 		newvers(d);
   2066 	}
   2067 	return 0;
   2068 }
   2069 
   2070 static void
   2071 atarsp(Block *b)
   2072 {
   2073 	unsigned n;
   2074 	short major;
   2075 	Aoeata *ahin, *ahout;
   2076 	Aoedev *d;
   2077 	Frame *f;
   2078 	Srb *srb;
   2079 
   2080 	ahin = (Aoeata*)b->rp;
   2081 	major = nhgets(ahin->major);
   2082 	d = mm2dev(major, ahin->minor);
   2083 	if(d == nil)
   2084 		return;
   2085 	QLOCK(d);
   2086 	if(waserror()){
   2087 		QUNLOCK(d);
   2088 		nexterror();
   2089 	}
   2090 	n = nhgetl(ahin->tag);
   2091 	f = getframe(d, n);
   2092 	if(f == nil){
   2093 		dprint("%æ: unexpected response; tag %ux\n", d, n);
   2094 		goto bail;
   2095 	}
   2096 	rtupdate(f->dl, tsince(f->tag));
   2097 	ahout = (Aoeata*)f->hdr;
   2098 	srb = f->srb;
   2099 
   2100 	if(ahin->cmdstat & 0xa9){
   2101 		eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
   2102 			d, ahout->cmdstat, ahin->cmdstat);
   2103 		if(srb)
   2104 			srb->error = Eio;
   2105 	} else {
   2106 		n = ahout->scnt * Aoesectsz;
   2107 		switch(ahout->cmdstat){
   2108 		case Crd:
   2109 		case Crdext:
   2110 			if(BLEN(b) - Szaoeata < n){
   2111 				eventlog("%æ: runt read blen %ld expect %d\n",
   2112 					d, BLEN(b), n);
   2113 				goto bail;
   2114 			}
   2115 			memmove(f->dp, b->rp + Szaoeata, n);
   2116 		case Cwr:
   2117 		case Cwrext:
   2118 			if(n > Dbcnt)
   2119 				f->nl->lostjumbo = 0;
   2120 			if(f->bcnt -= n){
   2121 				f->lba += n / Aoesectsz;
   2122 				f->dp = (uchar*)f->dp + n;
   2123 				resend(d, f);
   2124 				goto bail;
   2125 			}
   2126 			break;
   2127 		case Cid:
   2128 			if(BLEN(b) - Szaoeata < 512){
   2129 				eventlog("%æ: runt identify blen %ld expect %d\n",
   2130 					d, BLEN(b), n);
   2131 				goto bail;
   2132 			}
   2133 			identify(d, (ushort*)(b->rp + Szaoeata));
   2134 			break;
   2135 		default:
   2136 			eventlog("%æ: unknown ata command %.2ux \n",
   2137 				d, ahout->cmdstat);
   2138 		}
   2139 	}
   2140 
   2141 	if(srb && --srb->nout == 0 && srb->len == 0)
   2142 		WAKEUP(srb);
   2143 	f->srb = nil;
   2144 	f->tag = Tfree;
   2145 	d->nout--;
   2146 
   2147 	work(d);
   2148 bail:
   2149 	poperror();
   2150 	QUNLOCK(d);
   2151 }
   2152 
   2153 static void
   2154 netrdaoeproc(void *v)
   2155 {
   2156 	int idx;
   2157 	char name[Maxpath+1], *s;
   2158 	Aoehdr *h;
   2159 	Block *b;
   2160 	Netlink *nl;
   2161 
   2162 	nl = (Netlink*)v;
   2163 	idx = nl - netlinks.nl;
   2164 	netlinks.reader[idx] = 1;
   2165 	kstrcpy(name, nl->path, Maxpath);
   2166 
   2167 	if(waserror()){
   2168 		eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
   2169 		netlinks.reader[idx] = 0;
   2170 		wakeup(netlinks.rendez + idx);
   2171 		pexit(up->errstr, 1);
   2172 	}
   2173 	if(autodiscover)
   2174 		discover(0xffff, 0xff);
   2175 	for (;;) {
   2176 		if(!(nl->flag & Dup))
   2177 			error("netlink is down");
   2178 		if(nl->dc == nil)
   2179 			panic("netrdaoe: nl->dc == nil");
   2180 		b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
   2181 		if(b == nil)
   2182 			error("network read");
   2183 		h = (Aoehdr*)b->rp;
   2184 		if(h->verflag & AFrsp)
   2185 			if(s = aoeerror(h)){
   2186 				eventlog("%s: %s\n", nl->path, s);
   2187 				errrsp(b, s);
   2188 			}else if(h->cmd == ACata)
   2189 				atarsp(b);
   2190 			else if(h->cmd == ACconfig)
   2191 				qcfgrsp(b, nl);
   2192 			else if((h->cmd & 0xf0) == 0){
   2193 				eventlog("%s: unknown cmd %d\n",
   2194 					nl->path, h->cmd);
   2195 				errrsp(b, "unknown command");
   2196 			}
   2197 		freeb(b);
   2198 	}
   2199 }
   2200 
   2201 static void
   2202 getaddr(char *path, uchar *ea)
   2203 {
   2204 	int n;
   2205 	char buf[2*Eaddrlen+1];
   2206 	Chan *c;
   2207 
   2208 	uprint("%s/addr", path);
   2209 	c = namec(up->genbuf, Aopen, OREAD, 0);
   2210 	if(waserror()) {
   2211 		cclose(c);
   2212 		nexterror();
   2213 	}
   2214 	if(c == nil)
   2215 		panic("æ: getaddr: c == nil");
   2216 	n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
   2217 	poperror();
   2218 	cclose(c);
   2219 	buf[n] = 0;
   2220 	if(parseether(ea, buf) < 0)
   2221 		error("parseether failure");
   2222 }
   2223 
   2224 static void
   2225 netbind(char *path)
   2226 {
   2227 	char addr[Maxpath];
   2228 	uchar ea[2*Eaddrlen+1];
   2229 	Chan *dc, *cc, *mtu;
   2230 	Netlink *nl;
   2231 
   2232 	snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
   2233 	dc = chandial(addr, nil, nil, &cc);
   2234 	snprint(addr, sizeof addr, "%s/mtu", path);
   2235 	if(waserror())
   2236 		mtu = nil;
   2237 	else {
   2238 		mtu = namec(addr, Aopen, OREAD, 0);
   2239 		poperror();
   2240 	}
   2241 
   2242 	if(waserror()){
   2243 		cclose(dc);
   2244 		cclose(cc);
   2245 		if(mtu)
   2246 			cclose(mtu);
   2247 		nexterror();
   2248 	}
   2249 	if(dc == nil  || cc == nil)
   2250 		error(Enonexist);
   2251 	getaddr(path, ea);
   2252 	nl = addnet(path, cc, dc, mtu, ea);
   2253 	snprint(addr, sizeof addr, "netrdaoe@%s", path);
   2254 	kproc(addr, netrdaoeproc, nl);
   2255 	poperror();
   2256 }
   2257 
   2258 static int
   2259 unbound(void *v)
   2260 {
   2261 	return *(int*)v != 0;
   2262 }
   2263 
   2264 static void
   2265 netunbind(char *path)
   2266 {
   2267 	int i, idx;
   2268 	Aoedev *d, *p, *next;
   2269 	Chan *dc, *cc;
   2270 	Devlink *l;
   2271 	Frame *f;
   2272 	Netlink *n, *e;
   2273 
   2274 	n = netlinks.nl;
   2275 	e = n + nelem(netlinks.nl);
   2276 
   2277 	LOCK(&netlinks);
   2278 	for(; n < e; n++)
   2279 		if(n->dc && strcmp(n->path, path) == 0)
   2280 			break;
   2281 	UNLOCK(&netlinks);
   2282 	if(n == e)
   2283 		error("device not bound");
   2284 
   2285 	/*
   2286 	 * hunt down devices using this interface; disable
   2287 	 * this also terminates the reader.
   2288 	 */
   2289 	idx = n - netlinks.nl;
   2290 	WLOCK(&devs);
   2291 	for(d = devs.d; d; d = d->next){
   2292 		QLOCK(d);
   2293 		for(i = 0; i < d->ndl; i++){
   2294 			l = d->dl + i;
   2295 			if(l->nl == n)
   2296 				l->flag &= ~Dup;
   2297 		}
   2298 		QUNLOCK(d);
   2299 	}
   2300 	n->flag &= ~Dup;
   2301 	WUNLOCK(&devs);
   2302 
   2303 	/* confirm reader is down. */
   2304 	while(waserror())
   2305 		;
   2306 	sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
   2307 	poperror();
   2308 
   2309 	/* reschedule packets. */
   2310 	WLOCK(&devs);
   2311 	for(d = devs.d; d; d = d->next){
   2312 		QLOCK(d);
   2313 		for(i = 0; i < d->nframes; i++){
   2314 			f = d->frames + i;
   2315 			if(f->tag != Tfree && f->nl == n)
   2316 				resend(d, f);
   2317 		}
   2318 		QUNLOCK(d);
   2319 	}
   2320 	WUNLOCK(&devs);
   2321 
   2322 	/* squeeze devlink pool.  (we assert nobody is using them now) */
   2323 	WLOCK(&devs);
   2324 	for(d = devs.d; d; d = d->next){
   2325 		QLOCK(d);
   2326 		for(i = 0; i < d->ndl; i++){
   2327 			l = d->dl + i;
   2328 			if(l->nl == n)
   2329 				memmove(l, l + 1, sizeof *l * (--d->ndl - i));
   2330 		}
   2331 		QUNLOCK(d);
   2332 	}
   2333 	WUNLOCK(&devs);
   2334 
   2335 	/* close device link. */
   2336 	LOCK(&netlinks);
   2337 	dc = n->dc;
   2338 	cc = n->cc;
   2339 	if(n->mtu)
   2340 		cclose(n->mtu);
   2341 	memset(n, 0, sizeof *n);
   2342 	UNLOCK(&netlinks);
   2343 
   2344 	cclose(dc);
   2345 	cclose(cc);
   2346 
   2347 	/* squeeze orphan devices */
   2348 	WLOCK(&devs);
   2349 	for(p = d = devs.d; d; d = next){
   2350 		next = d->next;
   2351 		if(d->ndl > 0){
   2352 			p = d;
   2353 			continue;
   2354 		}
   2355 		QLOCK(d);
   2356 		downdev(d, "orphan");
   2357 		QUNLOCK(d);
   2358 		if(p != devs.d)
   2359 			p->next = next;
   2360 		else{
   2361 			devs.d = next;
   2362 			p = devs.d;
   2363 		}
   2364 		free(d->frames);
   2365 		free(d);
   2366 		dropunit();
   2367 	}
   2368 	WUNLOCK(&devs);
   2369 }
   2370 
   2371 static void
   2372 strtoss(char *f, ushort *shelf, ushort *slot)
   2373 {
   2374 	ulong sh;
   2375 	char *s;
   2376 
   2377 	*shelf = 0xffff;
   2378 	*slot = 0xff;
   2379 	if(!f)
   2380 		return;
   2381 	*shelf = sh = strtol(f, &s, 0);
   2382 	if(s == f || sh > 0xffff)
   2383 		error("bad shelf");
   2384 	f = s;
   2385 	if(*f++ == '.'){
   2386 		*slot = strtol(f, &s, 0);
   2387 		if(s == f || *slot > 0xff)
   2388 			error("bad shelf");
   2389 	}else
   2390 		*slot = 0xff;
   2391 }
   2392 
   2393 static void
   2394 discoverstr(char *f)
   2395 {
   2396 	ushort shelf, slot;
   2397 
   2398 	strtoss(f, &shelf, &slot);
   2399 	discover(shelf, slot);
   2400 }
   2401 
   2402 static void
   2403 removedev(Aoedev *d)
   2404 {
   2405 	int i;
   2406 	Aoedev *p;
   2407 
   2408 	WLOCK(&devs);
   2409 	p = 0;
   2410 	if(d != devs.d)
   2411 	for(p = devs.d; p; p = p->next)
   2412 		if(p->next == d)
   2413 			break;
   2414 	QLOCK(d);
   2415 	d->flag &= ~Dup;
   2416 	newvers(d);
   2417 	d->ndl = 0;
   2418 	QUNLOCK(d);
   2419 	for(i = 0; i < d->nframes; i++)
   2420 		frameerror(d, d->frames+i, Enotup);
   2421 
   2422 	if(p)
   2423 		p->next = d->next;
   2424 	else
   2425 		devs.d = d->next;
   2426 	free(d->frames);
   2427 	free(d);
   2428 	dropunit();
   2429 	WUNLOCK(&devs);
   2430 }
   2431 
   2432 
   2433 static void
   2434 aoeremove(Chan *c)
   2435 {
   2436 	switch(TYPE(c->qid)){
   2437 	default:
   2438 	case Qzero:
   2439 	case Qtopdir:
   2440 	case Qtoplog:
   2441 	case Qtopctl:
   2442 	case Qctl:
   2443 	case Qdata:
   2444 	case Qconfig:
   2445 	case Qident:
   2446 		error(Eperm);
   2447 	case Qunitdir:
   2448 		removedev(unit2dev(UNIT(c->qid)));
   2449 		break;
   2450 	}
   2451 }
   2452 
   2453 static void
   2454 removestr(char *f)
   2455 {
   2456 	ushort shelf, slot;
   2457 	Aoedev *d;
   2458 
   2459 	strtoss(f, &shelf, &slot);
   2460 	WLOCK(&devs);
   2461 	for(d = devs.d; d; d = d->next)
   2462 		if(shelf == d->major && slot == d->minor){
   2463 			WUNLOCK(&devs);	/* BOTCH */
   2464 			removedev(d);
   2465 			return;
   2466 		}
   2467 	WUNLOCK(&devs);
   2468 	error("device not bound");
   2469 }
   2470 
   2471 static long
   2472 topctlwrite(void *db, long n)
   2473 {
   2474 	enum {
   2475 		Autodiscover,
   2476 		Bind,
   2477 		Debug,
   2478 		Discover,
   2479 		Closewait,
   2480 		Rediscover,
   2481 		Remove,
   2482 		Unbind,
   2483 	};
   2484 	char *f;
   2485 	Cmdbuf *cb;
   2486 	Cmdtab *ct;
   2487 	static Cmdtab cmds[] = {
   2488 		{ Autodiscover,	"autodiscover",	0	},
   2489 		{ Bind, 	"bind", 	2	},
   2490 		{ Debug, 	"debug", 	0	},
   2491 		{ Discover, 	"discover", 	0	},
   2492 		{ Rediscover,	"rediscover",	0	},
   2493 		{ Remove,	"remove",	2	},
   2494 		{ Unbind,	"unbind",	2	},
   2495 	};
   2496 
   2497 	cb = parsecmd(db, n);
   2498 	if(waserror()){
   2499 		free(cb);
   2500 		nexterror();
   2501 	}
   2502 	ct = lookupcmd(cb, cmds, nelem(cmds));
   2503 	f = cb->f[1];
   2504 	switch(ct->index){
   2505 	case Autodiscover:
   2506 		autodiscover = toggle(f, autodiscover);
   2507 		break;
   2508 	case Bind:
   2509 		netbind(f);
   2510 		break;
   2511 	case Debug:
   2512 		debug = toggle(f, debug);
   2513 		break;
   2514 	case Discover:
   2515 		discoverstr(f);
   2516 		break;
   2517 	case Rediscover:
   2518 		rediscover = toggle(f, rediscover);
   2519 		break;
   2520 	case Remove:
   2521 		removestr(f);	/* depricated */
   2522 		break;
   2523 	case Unbind:
   2524 		netunbind(f);
   2525 		break;
   2526 	default:
   2527 		cmderror(cb, "unknown aoe control message");
   2528 	}
   2529 	poperror();
   2530 	free(cb);
   2531 	return n;
   2532 }
   2533 
   2534 static long
   2535 aoewrite(Chan *c, void *db, long n, vlong off)
   2536 {
   2537 	switch(TYPE(c->qid)){
   2538 	default:
   2539 	case Qzero:
   2540 	case Qtopdir:
   2541 	case Qunitdir:
   2542 	case Qtoplog:
   2543 		error(Eperm);
   2544 	case Qtopctl:
   2545 		return topctlwrite(db, n);
   2546 	case Qctl:
   2547 	case Qdata:
   2548 	case Qconfig:
   2549 	case Qident:
   2550 		return unitwrite(c, db, n, off);
   2551 	}
   2552 }
   2553 
   2554 Dev aoedevtab = {
   2555 	L'æ',
   2556 	"aoe",
   2557 
   2558 	devreset,
   2559 	devinit,
   2560 	devshutdown,
   2561 	aoeattach,
   2562 	aoewalk,
   2563 	aoestat,
   2564 	aoeopen,
   2565 	devcreate,
   2566 	aoeclose,
   2567 	aoeread,
   2568 	devbread,
   2569 	aoewrite,
   2570 	devbwrite,
   2571 	aoeremove,
   2572 	devwstat,
   2573 	devpower,
   2574 	devconfig,
   2575 };