vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

tcp.c (66246B)


      1 #include	"u.h"
      2 #include	"lib.h"
      3 #include	"mem.h"
      4 #include	"dat.h"
      5 #include	"fns.h"
      6 #include	"error.h"
      7 
      8 #include	"ip.h"
      9 
     10 enum
     11 {
     12 	QMAX		= 64*1024-1,
     13 	IP_TCPPROTO	= 6,
     14 
     15 	TCP4_IPLEN	= 8,
     16 	TCP4_PHDRSIZE	= 12,
     17 	TCP4_HDRSIZE	= 20,
     18 	TCP4_TCBPHDRSZ	= 40,
     19 	TCP4_PKT	= TCP4_IPLEN+TCP4_PHDRSIZE,
     20 
     21 	TCP6_IPLEN	= 0,
     22 	TCP6_PHDRSIZE	= 40,
     23 	TCP6_HDRSIZE	= 20,
     24 	TCP6_TCBPHDRSZ	= 60,
     25 	TCP6_PKT	= TCP6_IPLEN+TCP6_PHDRSIZE,
     26 
     27 	TcptimerOFF	= 0,
     28 	TcptimerON	= 1,
     29 	TcptimerDONE	= 2,
     30 	MAX_TIME 	= (1<<20),	/* Forever */
     31 	TCP_ACK		= 50,		/* Timed ack sequence in ms */
     32 	MAXBACKMS	= 9*60*1000,	/* longest backoff time (ms) before hangup */
     33 
     34 	URG		= 0x20,		/* Data marked urgent */
     35 	ACK		= 0x10,		/* Acknowledge is valid */
     36 	PSH		= 0x08,		/* Whole data pipe is pushed */
     37 	RST		= 0x04,		/* Reset connection */
     38 	SYN		= 0x02,		/* Pkt. is synchronise */
     39 	FIN		= 0x01,		/* Start close down */
     40 
     41 	EOLOPT		= 0,
     42 	NOOPOPT		= 1,
     43 	MSSOPT		= 2,
     44 	MSS_LENGTH	= 4,		/* Mean segment size */
     45 	WSOPT		= 3,
     46 	WS_LENGTH	= 3,		/* Bits to scale window size by */
     47 	MSL2		= 10,
     48 	MSPTICK		= 50,		/* Milliseconds per timer tick */
     49 	DEF_MSS		= 1460,		/* Default mean segment */
     50 	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
     51 	DEF_RTT		= 500,		/* Default round trip */
     52 	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
     53 	TCP_LISTEN	= 0,		/* Listen connection */
     54 	TCP_CONNECT	= 1,		/* Outgoing connection */
     55 	SYNACK_RXTIMER	= 250,		/* ms between SYNACK retransmits */
     56 
     57 	TCPREXMTTHRESH	= 3,		/* dupack threshhold for rxt */
     58 
     59 	FORCE		= 1,
     60 	CLONE		= 2,
     61 	RETRAN		= 4,
     62 	ACTIVE		= 8,
     63 	SYNACK		= 16,
     64 
     65 	LOGAGAIN	= 3,
     66 	LOGDGAIN	= 2,
     67 
     68 	Closed		= 0,		/* Connection states */
     69 	Listen,
     70 	Syn_sent,
     71 	Syn_received,
     72 	Established,
     73 	Finwait1,
     74 	Finwait2,
     75 	Close_wait,
     76 	Closing,
     77 	Last_ack,
     78 	Time_wait,
     79 
     80 	Maxlimbo	= 1000,		/* maximum procs waiting for response to SYN ACK */
     81 	NLHT		= 256,		/* hash table size, must be a power of 2 */
     82 	LHTMASK		= NLHT-1,
     83 
     84 	HaveWS		= 1<<8,
     85 };
     86 
     87 /* Must correspond to the enumeration above */
     88 char *tcpstates[] =
     89 {
     90 	"Closed", 	"Listen", 	"Syn_sent", "Syn_received",
     91 	"Established", 	"Finwait1",	"Finwait2", "Close_wait",
     92 	"Closing", 	"Last_ack", 	"Time_wait"
     93 };
     94 
     95 typedef struct Tcptimer Tcptimer;
     96 struct Tcptimer
     97 {
     98 	Tcptimer	*next;
     99 	Tcptimer	*prev;
    100 	Tcptimer	*readynext;
    101 	int	state;
    102 	int	start;
    103 	int	count;
    104 	void	(*func)(void*);
    105 	void	*arg;
    106 };
    107 
    108 /*
    109  *  v4 and v6 pseudo headers used for
    110  *  checksuming tcp
    111  */
    112 typedef struct Tcp4hdr Tcp4hdr;
    113 struct Tcp4hdr
    114 {
    115 	uchar	vihl;		/* Version and header length */
    116 	uchar	tos;		/* Type of service */
    117 	uchar	length[2];	/* packet length */
    118 	uchar	id[2];		/* Identification */
    119 	uchar	frag[2];	/* Fragment information */
    120 	uchar	Unused;
    121 	uchar	proto;
    122 	uchar	tcplen[2];
    123 	uchar	tcpsrc[4];
    124 	uchar	tcpdst[4];
    125 	uchar	tcpsport[2];
    126 	uchar	tcpdport[2];
    127 	uchar	tcpseq[4];
    128 	uchar	tcpack[4];
    129 	uchar	tcpflag[2];
    130 	uchar	tcpwin[2];
    131 	uchar	tcpcksum[2];
    132 	uchar	tcpurg[2];
    133 	/* Options segment */
    134 	uchar	tcpopt[1];
    135 };
    136 
    137 typedef struct Tcp6hdr Tcp6hdr;
    138 struct Tcp6hdr
    139 {
    140 	uchar	vcf[4];
    141 	uchar	ploadlen[2];
    142 	uchar	proto;
    143 	uchar	ttl;
    144 	uchar	tcpsrc[IPaddrlen];
    145 	uchar	tcpdst[IPaddrlen];
    146 	uchar	tcpsport[2];
    147 	uchar	tcpdport[2];
    148 	uchar	tcpseq[4];
    149 	uchar	tcpack[4];
    150 	uchar	tcpflag[2];
    151 	uchar	tcpwin[2];
    152 	uchar	tcpcksum[2];
    153 	uchar	tcpurg[2];
    154 	/* Options segment */
    155 	uchar	tcpopt[1];
    156 };
    157 
    158 /*
    159  *  this represents the control info
    160  *  for a single packet.  It is derived from
    161  *  a packet in ntohtcp{4,6}() and stuck into
    162  *  a packet in htontcp{4,6}().
    163  */
    164 typedef struct Tcp Tcp;
    165 struct	Tcp
    166 {
    167 	ushort	source;
    168 	ushort	dest;
    169 	ulong	seq;
    170 	ulong	ack;
    171 	uchar	flags;
    172 	ushort	ws;	/* window scale option (if not zero) */
    173 	ulong	wnd;
    174 	ushort	urg;
    175 	ushort	mss;	/* max segment size option (if not zero) */
    176 	ushort	len;	/* size of data */
    177 };
    178 
    179 /*
    180  *  this header is malloc'd to thread together fragments
    181  *  waiting to be coalesced
    182  */
    183 typedef struct Reseq Reseq;
    184 struct Reseq
    185 {
    186 	Reseq	*next;
    187 	Tcp	seg;
    188 	Block	*bp;
    189 	ushort	length;
    190 };
    191 
    192 /*
    193  *  the QLOCK in the Conv locks this structure
    194  */
    195 typedef struct Tcpctl Tcpctl;
    196 struct Tcpctl
    197 {
    198 	uchar	state;			/* Connection state */
    199 	uchar	type;			/* Listening or active connection */
    200 	uchar	code;			/* Icmp code */
    201 	struct {
    202 		ulong	una;		/* Unacked data pointer */
    203 		ulong	nxt;		/* Next sequence expected */
    204 		ulong	ptr;		/* Data pointer */
    205 		ulong	wnd;		/* Tcp send window */
    206 		ulong	urg;		/* Urgent data pointer */
    207 		ulong	wl2;
    208 		int	scale;		/* how much to right shift window in xmitted packets */
    209 		/* to implement tahoe and reno TCP */
    210 		ulong	dupacks;	/* number of duplicate acks rcvd */
    211 		int	recovery;	/* loss recovery flag */
    212 		ulong	rxt;		/* right window marker for recovery */
    213 	} snd;
    214 	struct {
    215 		ulong	nxt;		/* Receive pointer to next uchar slot */
    216 		ulong	wnd;		/* Receive window incoming */
    217 		ulong	urg;		/* Urgent pointer */
    218 		int	blocked;
    219 		int	una;		/* unacked data segs */
    220 		int	scale;		/* how much to left shift window in rcved packets */
    221 	} rcv;
    222 	ulong	iss;			/* Initial sequence number */
    223 	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
    224 	ulong	cwind;			/* Congestion window */
    225 	int	scale;			/* desired snd.scale */
    226 	ushort	ssthresh;		/* Slow start threshold */
    227 	int	resent;			/* Bytes just resent */
    228 	int	irs;			/* Initial received squence */
    229 	ushort	mss;			/* Mean segment size */
    230 	int	rerecv;			/* Overlap of data rerecevived */
    231 	ulong	window;			/* Recevive window */
    232 	uchar	backoff;		/* Exponential backoff counter */
    233 	int	backedoff;		/* ms we've backed off for rexmits */
    234 	uchar	flags;			/* State flags */
    235 	Reseq	*reseq;			/* Resequencing queue */
    236 	Tcptimer	timer;			/* Activity timer */
    237 	Tcptimer	acktimer;		/* Acknowledge timer */
    238 	Tcptimer	rtt_timer;		/* Round trip timer */
    239 	Tcptimer	katimer;		/* keep alive timer */
    240 	ulong	rttseq;			/* Round trip sequence */
    241 	int	srtt;			/* Shortened round trip */
    242 	int	mdev;			/* Mean deviation of round trip */
    243 	int	kacounter;		/* count down for keep alive */
    244 	uint	sndsyntime;		/* time syn sent */
    245 	ulong	time;			/* time Finwait2 or Syn_received was sent */
    246 	int	nochecksum;		/* non-zero means don't send checksums */
    247 	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
    248 
    249 	union {
    250 		Tcp4hdr	tcp4hdr;
    251 		Tcp6hdr	tcp6hdr;
    252 	} protohdr;		/* prototype header */
    253 };
    254 
    255 /*
    256  *  New calls are put in limbo rather than having a conversation structure
    257  *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
    258  *  any real Conv structures mucking things up.  Calls in limbo rexmit their
    259  *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
    260  *
    261  *  In particular they aren't on a listener's queue so that they don't figure
    262  *  in the input queue limit.
    263  *
    264  *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
    265  *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
    266  *  there is no hashing of this list.
    267  */
    268 typedef struct Limbo Limbo;
    269 struct Limbo
    270 {
    271 	Limbo	*next;
    272 
    273 	uchar	laddr[IPaddrlen];
    274 	uchar	raddr[IPaddrlen];
    275 	ushort	lport;
    276 	ushort	rport;
    277 	ulong	irs;		/* initial received sequence */
    278 	ulong	iss;		/* initial sent sequence */
    279 	ushort	mss;		/* mss from the other end */
    280 	ushort	rcvscale;	/* how much to scale rcvd windows */
    281 	ushort	sndscale;	/* how much to scale sent windows */
    282 	ulong	lastsend;	/* last time we sent a synack */
    283 	uchar	version;	/* v4 or v6 */
    284 	uchar	rexmits;	/* number of retransmissions */
    285 };
    286 
    287 int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
    288 ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
    289 
    290 enum {
    291 	/* MIB stats */
    292 	MaxConn,
    293 	ActiveOpens,
    294 	PassiveOpens,
    295 	EstabResets,
    296 	CurrEstab,
    297 	InSegs,
    298 	OutSegs,
    299 	RetransSegs,
    300 	RetransTimeouts,
    301 	InErrs,
    302 	OutRsts,
    303 
    304 	/* non-MIB stats */
    305 	CsumErrs,
    306 	HlenErrs,
    307 	LenErrs,
    308 	OutOfOrder,
    309 
    310 	Nstats
    311 };
    312 
    313 static char *statnames[] =
    314 {
    315 [MaxConn]	"MaxConn",
    316 [ActiveOpens]	"ActiveOpens",
    317 [PassiveOpens]	"PassiveOpens",
    318 [EstabResets]	"EstabResets",
    319 [CurrEstab]	"CurrEstab",
    320 [InSegs]	"InSegs",
    321 [OutSegs]	"OutSegs",
    322 [RetransSegs]	"RetransSegs",
    323 [RetransTimeouts]	"RetransTimeouts",
    324 [InErrs]	"InErrs",
    325 [OutRsts]	"OutRsts",
    326 [CsumErrs]	"CsumErrs",
    327 [HlenErrs]	"HlenErrs",
    328 [LenErrs]	"LenErrs",
    329 [OutOfOrder]	"OutOfOrder",
    330 };
    331 
    332 typedef struct Tcppriv Tcppriv;
    333 struct Tcppriv
    334 {
    335 	/* List of active timers */
    336 	QLock 	tl;
    337 	Tcptimer *timers;
    338 
    339 	/* hash table for matching conversations */
    340 	Ipht	ht;
    341 
    342 	/* calls in limbo waiting for an ACK to our SYN ACK */
    343 	int	nlimbo;
    344 	Limbo	*lht[NLHT];
    345 
    346 	/* for keeping track of tcpackproc */
    347 	QLock	apl;
    348 	int	ackprocstarted;
    349 
    350 	ulong	stats[Nstats];
    351 };
    352 
    353 /*
    354  *  Setting tcpporthogdefense to non-zero enables Dong Lin's
    355  *  solution to hijacked systems staking out port's as a form
    356  *  of DoS attack.
    357  *
    358  *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
    359  *  that number gets acked by the other end, we shut down the connection.
    360  *  Look for tcpporthogdefense in the code.
    361  */
    362 int tcpporthogdefense = 0;
    363 
    364 int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
    365 void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
    366 void	localclose(Conv*, char*);
    367 void	procsyn(Conv*, Tcp*);
    368 void	tcpiput(Proto*, Ipifc*, Block*);
    369 void	tcpoutput(Conv*);
    370 int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
    371 void	tcpstart(Conv*, int);
    372 void	tcptimeout(void*);
    373 void	tcpsndsyn(Conv*, Tcpctl*);
    374 void	tcprcvwin(Conv*);
    375 void	tcpacktimer(void*);
    376 void	tcpkeepalive(void*);
    377 void	tcpsetkacounter(Tcpctl*);
    378 void	tcprxmit(Conv*);
    379 void	tcpsettimer(Tcpctl*);
    380 void	tcpsynackrtt(Conv*);
    381 void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
    382 
    383 static void limborexmit(Proto*);
    384 static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
    385 
    386 void
    387 tcpsetstate(Conv *s, uchar newstate)
    388 {
    389 	Tcpctl *tcb;
    390 	uchar oldstate;
    391 	Tcppriv *tpriv;
    392 
    393 	tpriv = s->p->priv;
    394 
    395 	tcb = (Tcpctl*)s->ptcl;
    396 
    397 	oldstate = tcb->state;
    398 	if(oldstate == newstate)
    399 		return;
    400 
    401 	if(oldstate == Established)
    402 		tpriv->stats[CurrEstab]--;
    403 	if(newstate == Established)
    404 		tpriv->stats[CurrEstab]++;
    405 
    406 	/**
    407 	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
    408 		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
    409 	**/
    410 
    411 	switch(newstate) {
    412 	case Closed:
    413 		qclose(s->rq);
    414 		qclose(s->wq);
    415 		qclose(s->eq);
    416 		break;
    417 
    418 	case Close_wait:		/* Remote closes */
    419 		qhangup(s->rq, nil);
    420 		break;
    421 	}
    422 
    423 	tcb->state = newstate;
    424 
    425 	if(oldstate == Syn_sent && newstate != Closed)
    426 		Fsconnected(s, nil);
    427 }
    428 
    429 static char*
    430 tcpconnect(Conv *c, char **argv, int argc)
    431 {
    432 	char *e;
    433 	Tcpctl *tcb;
    434 
    435 	tcb = (Tcpctl*)(c->ptcl);
    436 	if(tcb->state != Closed)
    437 		return Econinuse;
    438 
    439 	e = Fsstdconnect(c, argv, argc);
    440 	if(e != nil)
    441 		return e;
    442 	tcpstart(c, TCP_CONNECT);
    443 
    444 	return nil;
    445 }
    446 
    447 static int
    448 tcpstate(Conv *c, char *state, int n)
    449 {
    450 	Tcpctl *s;
    451 
    452 	s = (Tcpctl*)(c->ptcl);
    453 
    454 	return snprint(state, n,
    455 		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
    456 		tcpstates[s->state],
    457 		c->rq ? qlen(c->rq) : 0,
    458 		c->wq ? qlen(c->wq) : 0,
    459 		s->srtt, s->mdev,
    460 		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
    461 		s->timer.start, s->timer.count, s->rerecv,
    462 		s->katimer.start, s->katimer.count);
    463 }
    464 
    465 static int
    466 tcpinuse(Conv *c)
    467 {
    468 	Tcpctl *s;
    469 
    470 	s = (Tcpctl*)(c->ptcl);
    471 	return s->state != Closed;
    472 }
    473 
    474 static char*
    475 tcpannounce(Conv *c, char **argv, int argc)
    476 {
    477 	char *e;
    478 	Tcpctl *tcb;
    479 
    480 	tcb = (Tcpctl*)(c->ptcl);
    481 	if(tcb->state != Closed)
    482 		return Econinuse;
    483 
    484 	e = Fsstdannounce(c, argv, argc);
    485 	if(e != nil)
    486 		return e;
    487 	tcpstart(c, TCP_LISTEN);
    488 	Fsconnected(c, nil);
    489 
    490 	return nil;
    491 }
    492 
    493 /*
    494  *  tcpclose is always called with the q locked
    495  */
    496 static void
    497 tcpclose(Conv *c)
    498 {
    499 	Tcpctl *tcb;
    500 
    501 	tcb = (Tcpctl*)c->ptcl;
    502 
    503 	qhangup(c->rq, nil);
    504 	qhangup(c->wq, nil);
    505 	qhangup(c->eq, nil);
    506 	qflush(c->rq);
    507 
    508 	switch(tcb->state) {
    509 	case Listen:
    510 		/*
    511 		 *  reset any incoming calls to this listener
    512 		 */
    513 		Fsconnected(c, "Hangup");
    514 
    515 		localclose(c, nil);
    516 		break;
    517 	case Closed:
    518 	case Syn_sent:
    519 		localclose(c, nil);
    520 		break;
    521 	case Syn_received:
    522 	case Established:
    523 		tcb->flgcnt++;
    524 		tcb->snd.nxt++;
    525 		tcpsetstate(c, Finwait1);
    526 		tcpoutput(c);
    527 		break;
    528 	case Close_wait:
    529 		tcb->flgcnt++;
    530 		tcb->snd.nxt++;
    531 		tcpsetstate(c, Last_ack);
    532 		tcpoutput(c);
    533 		break;
    534 	}
    535 }
    536 
    537 void
    538 tcpkick(void *x)
    539 {
    540 	Conv *s = x;
    541 	Tcpctl *tcb;
    542 
    543 	tcb = (Tcpctl*)s->ptcl;
    544 
    545 	if(waserror()){
    546 		QUNLOCK(s);
    547 		nexterror();
    548 	}
    549 	QLOCK(s);
    550 
    551 	switch(tcb->state) {
    552 	case Syn_sent:
    553 	case Syn_received:
    554 	case Established:
    555 	case Close_wait:
    556 		/*
    557 		 * Push data
    558 		 */
    559 		tcprcvwin(s);
    560 		tcpoutput(s);
    561 		break;
    562 	default:
    563 		localclose(s, "Hangup");
    564 		break;
    565 	}
    566 
    567 	QUNLOCK(s);
    568 	poperror();
    569 }
    570 
    571 void
    572 tcprcvwin(Conv *s)				/* Call with tcb locked */
    573 {
    574 	int w;
    575 	Tcpctl *tcb;
    576 
    577 	tcb = (Tcpctl*)s->ptcl;
    578 	w = tcb->window - qlen(s->rq);
    579 	if(w < 0)
    580 		w = 0;
    581 	tcb->rcv.wnd = w;
    582 	if(w == 0)
    583 		tcb->rcv.blocked = 1;
    584 }
    585 
    586 void
    587 tcpacktimer(void *v)
    588 {
    589 	Tcpctl *tcb;
    590 	Conv *s;
    591 
    592 	s = v;
    593 	tcb = (Tcpctl*)s->ptcl;
    594 
    595 	if(waserror()){
    596 		QUNLOCK(s);
    597 		nexterror();
    598 	}
    599 	QLOCK(s);
    600 	if(tcb->state != Closed){
    601 		tcb->flags |= FORCE;
    602 		tcprcvwin(s);
    603 		tcpoutput(s);
    604 	}
    605 	QUNLOCK(s);
    606 	poperror();
    607 }
    608 
    609 static void
    610 tcpcreate(Conv *c)
    611 {
    612 	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
    613 	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
    614 }
    615 
    616 static void
    617 timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
    618 {
    619 	if(newstate != TcptimerON){
    620 		if(t->state == TcptimerON){
    621 			/* unchain */
    622 			if(priv->timers == t){
    623 				priv->timers = t->next;
    624 				if(t->prev != nil)
    625 					panic("timerstate1");
    626 			}
    627 			if(t->next)
    628 				t->next->prev = t->prev;
    629 			if(t->prev)
    630 				t->prev->next = t->next;
    631 			t->next = t->prev = nil;
    632 		}
    633 	} else {
    634 		if(t->state != TcptimerON){
    635 			/* chain */
    636 			if(t->prev != nil || t->next != nil)
    637 				panic("timerstate2");
    638 			t->prev = nil;
    639 			t->next = priv->timers;
    640 			if(t->next)
    641 				t->next->prev = t;
    642 			priv->timers = t;
    643 		}
    644 	}
    645 	t->state = newstate;
    646 }
    647 
    648 void
    649 tcpackproc(void *a)
    650 {
    651 	Tcptimer *t, *tp, *timeo;
    652 	Proto *tcp;
    653 	Tcppriv *priv;
    654 	int loop;
    655 
    656 	tcp = a;
    657 	priv = tcp->priv;
    658 
    659 	for(;;) {
    660 		tsleep(&up->sleep, return0, 0, MSPTICK);
    661 
    662 		qlock(&priv->tl);
    663 		timeo = nil;
    664 		loop = 0;
    665 		for(t = priv->timers; t != nil; t = tp) {
    666 			if(loop++ > 10000)
    667 				panic("tcpackproc1");
    668 			tp = t->next;
    669  			if(t->state == TcptimerON) {
    670 				t->count--;
    671 				if(t->count == 0) {
    672 					timerstate(priv, t, TcptimerDONE);
    673 					t->readynext = timeo;
    674 					timeo = t;
    675 				}
    676 			}
    677 		}
    678 		qunlock(&priv->tl);
    679 
    680 		loop = 0;
    681 		for(t = timeo; t != nil; t = t->readynext) {
    682 			if(loop++ > 10000)
    683 				panic("tcpackproc2");
    684 			if(t->state == TcptimerDONE && t->func != nil && !waserror()){
    685 				(*t->func)(t->arg);
    686 				poperror();
    687 			}
    688 		}
    689 
    690 		limborexmit(tcp);
    691 	}
    692 }
    693 
    694 void
    695 tcpgo(Tcppriv *priv, Tcptimer *t)
    696 {
    697 	if(t == nil || t->start == 0)
    698 		return;
    699 
    700 	qlock(&priv->tl);
    701 	t->count = t->start;
    702 	timerstate(priv, t, TcptimerON);
    703 	qunlock(&priv->tl);
    704 }
    705 
    706 void
    707 tcphalt(Tcppriv *priv, Tcptimer *t)
    708 {
    709 	if(t == nil)
    710 		return;
    711 
    712 	qlock(&priv->tl);
    713 	timerstate(priv, t, TcptimerOFF);
    714 	qunlock(&priv->tl);
    715 }
    716 
    717 int
    718 backoff(int n)
    719 {
    720 	return 1 << n;
    721 }
    722 
    723 void
    724 localclose(Conv *s, char *reason)	/* called with tcb locked */
    725 {
    726 	Tcpctl *tcb;
    727 	Reseq *rp,*rp1;
    728 	Tcppriv *tpriv;
    729 
    730 	tpriv = s->p->priv;
    731 	tcb = (Tcpctl*)s->ptcl;
    732 
    733 	iphtrem(&tpriv->ht, s);
    734 
    735 	tcphalt(tpriv, &tcb->timer);
    736 	tcphalt(tpriv, &tcb->rtt_timer);
    737 	tcphalt(tpriv, &tcb->acktimer);
    738 	tcphalt(tpriv, &tcb->katimer);
    739 
    740 	/* Flush reassembly queue; nothing more can arrive */
    741 	for(rp = tcb->reseq; rp != nil; rp = rp1) {
    742 		rp1 = rp->next;
    743 		freeblist(rp->bp);
    744 		free(rp);
    745 	}
    746 	tcb->reseq = nil;
    747 
    748 	if(tcb->state == Syn_sent)
    749 		Fsconnected(s, reason);
    750 	if(s->state == Announced)
    751 		wakeup(&s->listenr);
    752 
    753 	qhangup(s->rq, reason);
    754 	qhangup(s->wq, reason);
    755 
    756 	tcpsetstate(s, Closed);
    757 }
    758 
    759 /* mtu (- TCP + IP hdr len) of 1st hop */
    760 int
    761 tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
    762 {
    763 	Ipifc *ifc;
    764 	int mtu;
    765 
    766 	ifc = findipifc(tcp->f, addr, 0);
    767 	switch(version){
    768 	default:
    769 	case V4:
    770 		mtu = DEF_MSS;
    771 		if(ifc != nil)
    772 			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
    773 		break;
    774 	case V6:
    775 		mtu = DEF_MSS6;
    776 		if(ifc != nil)
    777 			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
    778 		break;
    779 	}
    780 	if(ifc != nil){
    781 		if(ifc->mbps > 1000)
    782 			*scale = HaveWS | 4;
    783 		else if(ifc->mbps > 100)
    784 			*scale = HaveWS | 3;
    785 		else if(ifc->mbps > 10)
    786 			*scale = HaveWS | 1;
    787 		else
    788 			*scale = HaveWS | 0;
    789 	} else
    790 		*scale = HaveWS | 0;
    791 
    792 	return mtu;
    793 }
    794 
    795 void
    796 inittcpctl(Conv *s, int mode)
    797 {
    798 	Tcpctl *tcb;
    799 	Tcp4hdr* h4;
    800 	Tcp6hdr* h6;
    801 	int mss;
    802 
    803 	tcb = (Tcpctl*)s->ptcl;
    804 
    805 	memset(tcb, 0, sizeof(Tcpctl));
    806 
    807 	tcb->ssthresh = 65535;
    808 	tcb->srtt = tcp_irtt<<LOGAGAIN;
    809 	tcb->mdev = 0;
    810 
    811 	/* setup timers */
    812 	tcb->timer.start = tcp_irtt / MSPTICK;
    813 	tcb->timer.func = tcptimeout;
    814 	tcb->timer.arg = s;
    815 	tcb->rtt_timer.start = MAX_TIME;
    816 	tcb->acktimer.start = TCP_ACK / MSPTICK;
    817 	tcb->acktimer.func = tcpacktimer;
    818 	tcb->acktimer.arg = s;
    819 	tcb->katimer.start = DEF_KAT / MSPTICK;
    820 	tcb->katimer.func = tcpkeepalive;
    821 	tcb->katimer.arg = s;
    822 
    823 	mss = DEF_MSS;
    824 
    825 	/* create a prototype(pseudo) header */
    826 	if(mode != TCP_LISTEN){
    827 		if(ipcmp(s->laddr, IPnoaddr) == 0)
    828 			findlocalip(s->p->f, s->laddr, s->raddr);
    829 
    830 		switch(s->ipversion){
    831 		case V4:
    832 			h4 = &tcb->protohdr.tcp4hdr;
    833 			memset(h4, 0, sizeof(*h4));
    834 			h4->proto = IP_TCPPROTO;
    835 			hnputs(h4->tcpsport, s->lport);
    836 			hnputs(h4->tcpdport, s->rport);
    837 			v6tov4(h4->tcpsrc, s->laddr);
    838 			v6tov4(h4->tcpdst, s->raddr);
    839 			break;
    840 		case V6:
    841 			h6 = &tcb->protohdr.tcp6hdr;
    842 			memset(h6, 0, sizeof(*h6));
    843 			h6->proto = IP_TCPPROTO;
    844 			hnputs(h6->tcpsport, s->lport);
    845 			hnputs(h6->tcpdport, s->rport);
    846 			ipmove(h6->tcpsrc, s->laddr);
    847 			ipmove(h6->tcpdst, s->raddr);
    848 			mss = DEF_MSS6;
    849 			break;
    850 		default:
    851 			panic("inittcpctl: version %d", s->ipversion);
    852 		}
    853 	}
    854 
    855 	tcb->mss = tcb->cwind = mss;
    856 
    857 	/* default is no window scaling */
    858 	tcb->window = QMAX;
    859 	tcb->rcv.wnd = QMAX;
    860 	tcb->rcv.scale = 0;
    861 	tcb->snd.scale = 0;
    862 	qsetlimit(s->rq, QMAX);
    863 }
    864 
    865 /*
    866  *  called with s QLOCKed
    867  */
    868 void
    869 tcpstart(Conv *s, int mode)
    870 {
    871 	Tcpctl *tcb;
    872 	Tcppriv *tpriv;
    873 	char kpname[KNAMELEN];
    874 
    875 	tpriv = s->p->priv;
    876 
    877 	if(tpriv->ackprocstarted == 0){
    878 		qlock(&tpriv->apl);
    879 		if(tpriv->ackprocstarted == 0){
    880 			sprint(kpname, "#I%dtcpack", s->p->f->dev);
    881 			kproc(kpname, tcpackproc, s->p);
    882 			tpriv->ackprocstarted = 1;
    883 		}
    884 		qunlock(&tpriv->apl);
    885 	}
    886 
    887 	tcb = (Tcpctl*)s->ptcl;
    888 
    889 	inittcpctl(s, mode);
    890 
    891 	iphtadd(&tpriv->ht, s);
    892 	switch(mode) {
    893 	case TCP_LISTEN:
    894 		tpriv->stats[PassiveOpens]++;
    895 		tcb->flags |= CLONE;
    896 		tcpsetstate(s, Listen);
    897 		break;
    898 
    899 	case TCP_CONNECT:
    900 		tpriv->stats[ActiveOpens]++;
    901 		tcb->flags |= ACTIVE;
    902 		tcpsndsyn(s, tcb);
    903 		tcpsetstate(s, Syn_sent);
    904 		tcpoutput(s);
    905 		break;
    906 	}
    907 }
    908 
    909 static char*
    910 tcpflag(ushort flag)
    911 {
    912 	static char buf[128];
    913 
    914 	sprint(buf, "%d", flag>>10);	/* Head len */
    915 	if(flag & URG)
    916 		strcat(buf, " URG");
    917 	if(flag & ACK)
    918 		strcat(buf, " ACK");
    919 	if(flag & PSH)
    920 		strcat(buf, " PSH");
    921 	if(flag & RST)
    922 		strcat(buf, " RST");
    923 	if(flag & SYN)
    924 		strcat(buf, " SYN");
    925 	if(flag & FIN)
    926 		strcat(buf, " FIN");
    927 
    928 	return buf;
    929 }
    930 
    931 Block *
    932 htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
    933 {
    934 	int dlen;
    935 	Tcp6hdr *h;
    936 	ushort csum;
    937 	ushort hdrlen, optpad = 0;
    938 	uchar *opt;
    939 
    940 	hdrlen = TCP6_HDRSIZE;
    941 	if(tcph->flags & SYN){
    942 		if(tcph->mss)
    943 			hdrlen += MSS_LENGTH;
    944 		if(tcph->ws)
    945 			hdrlen += WS_LENGTH;
    946 		optpad = hdrlen & 3;
    947 		if(optpad)
    948 			optpad = 4 - optpad;
    949 		hdrlen += optpad;
    950 	}
    951 
    952 	if(data) {
    953 		dlen = blocklen(data);
    954 		data = padblock(data, hdrlen + TCP6_PKT);
    955 		if(data == nil)
    956 			return nil;
    957 	}
    958 	else {
    959 		dlen = 0;
    960 		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
    961 		if(data == nil)
    962 			return nil;
    963 		data->wp += hdrlen + TCP6_PKT;
    964 	}
    965 
    966 	/* copy in pseudo ip header plus port numbers */
    967 	h = (Tcp6hdr *)(data->rp);
    968 	memmove(h, ph, TCP6_TCBPHDRSZ);
    969 
    970 	/* compose pseudo tcp header, do cksum calculation */
    971 	hnputl(h->vcf, hdrlen + dlen);
    972 	h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
    973 	h->ttl = ph->proto;
    974 
    975 	/* copy in variable bits */
    976 	hnputl(h->tcpseq, tcph->seq);
    977 	hnputl(h->tcpack, tcph->ack);
    978 	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
    979 	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
    980 	hnputs(h->tcpurg, tcph->urg);
    981 
    982 	if(tcph->flags & SYN){
    983 		opt = h->tcpopt;
    984 		if(tcph->mss != 0){
    985 			*opt++ = MSSOPT;
    986 			*opt++ = MSS_LENGTH;
    987 			hnputs(opt, tcph->mss);
    988 			opt += 2;
    989 		}
    990 		if(tcph->ws != 0){
    991 			*opt++ = WSOPT;
    992 			*opt++ = WS_LENGTH;
    993 			*opt++ = tcph->ws;
    994 		}
    995 		while(optpad-- > 0)
    996 			*opt++ = NOOPOPT;
    997 	}
    998 
    999 	if(tcb != nil && tcb->nochecksum){
   1000 		h->tcpcksum[0] = h->tcpcksum[1] = 0;
   1001 	} else {
   1002 		csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
   1003 		hnputs(h->tcpcksum, csum);
   1004 	}
   1005 
   1006 	/* move from pseudo header back to normal ip header */
   1007 	memset(h->vcf, 0, 4);
   1008 	h->vcf[0] = IP_VER6;
   1009 	hnputs(h->ploadlen, hdrlen+dlen);
   1010 	h->proto = ph->proto;
   1011 
   1012 	return data;
   1013 }
   1014 
   1015 Block *
   1016 htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
   1017 {
   1018 	int dlen;
   1019 	Tcp4hdr *h;
   1020 	ushort csum;
   1021 	ushort hdrlen, optpad = 0;
   1022 	uchar *opt;
   1023 
   1024 	hdrlen = TCP4_HDRSIZE;
   1025 	if(tcph->flags & SYN){
   1026 		if(tcph->mss)
   1027 			hdrlen += MSS_LENGTH;
   1028 		if(tcph->ws)
   1029 			hdrlen += WS_LENGTH;
   1030 		optpad = hdrlen & 3;
   1031 		if(optpad)
   1032 			optpad = 4 - optpad;
   1033 		hdrlen += optpad;
   1034 	}
   1035 
   1036 	if(data) {
   1037 		dlen = blocklen(data);
   1038 		data = padblock(data, hdrlen + TCP4_PKT);
   1039 		if(data == nil)
   1040 			return nil;
   1041 	}
   1042 	else {
   1043 		dlen = 0;
   1044 		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
   1045 		if(data == nil)
   1046 			return nil;
   1047 		data->wp += hdrlen + TCP4_PKT;
   1048 	}
   1049 
   1050 	/* copy in pseudo ip header plus port numbers */
   1051 	h = (Tcp4hdr *)(data->rp);
   1052 	memmove(h, ph, TCP4_TCBPHDRSZ);
   1053 
   1054 	/* copy in variable bits */
   1055 	hnputs(h->tcplen, hdrlen + dlen);
   1056 	hnputl(h->tcpseq, tcph->seq);
   1057 	hnputl(h->tcpack, tcph->ack);
   1058 	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
   1059 	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
   1060 	hnputs(h->tcpurg, tcph->urg);
   1061 
   1062 	if(tcph->flags & SYN){
   1063 		opt = h->tcpopt;
   1064 		if(tcph->mss != 0){
   1065 			*opt++ = MSSOPT;
   1066 			*opt++ = MSS_LENGTH;
   1067 			hnputs(opt, tcph->mss);
   1068 			opt += 2;
   1069 		}
   1070 		if(tcph->ws != 0){
   1071 			*opt++ = WSOPT;
   1072 			*opt++ = WS_LENGTH;
   1073 			*opt++ = tcph->ws;
   1074 		}
   1075 		while(optpad-- > 0)
   1076 			*opt++ = NOOPOPT;
   1077 	}
   1078 
   1079 	if(tcb != nil && tcb->nochecksum){
   1080 		h->tcpcksum[0] = h->tcpcksum[1] = 0;
   1081 	} else {
   1082 		csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
   1083 		hnputs(h->tcpcksum, csum);
   1084 	}
   1085 
   1086 	return data;
   1087 }
   1088 
   1089 int
   1090 ntohtcp6(Tcp *tcph, Block **bpp)
   1091 {
   1092 	Tcp6hdr *h;
   1093 	uchar *optr;
   1094 	ushort hdrlen;
   1095 	ushort optlen;
   1096 	int n;
   1097 
   1098 	*bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
   1099 	if(*bpp == nil)
   1100 		return -1;
   1101 
   1102 	h = (Tcp6hdr *)((*bpp)->rp);
   1103 	tcph->source = nhgets(h->tcpsport);
   1104 	tcph->dest = nhgets(h->tcpdport);
   1105 	tcph->seq = nhgetl(h->tcpseq);
   1106 	tcph->ack = nhgetl(h->tcpack);
   1107 	hdrlen = (h->tcpflag[0]>>2) & ~3;
   1108 	if(hdrlen < TCP6_HDRSIZE) {
   1109 		freeblist(*bpp);
   1110 		return -1;
   1111 	}
   1112 
   1113 	tcph->flags = h->tcpflag[1];
   1114 	tcph->wnd = nhgets(h->tcpwin);
   1115 	tcph->urg = nhgets(h->tcpurg);
   1116 	tcph->mss = 0;
   1117 	tcph->ws = 0;
   1118 	tcph->len = nhgets(h->ploadlen) - hdrlen;
   1119 
   1120 	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
   1121 	if(*bpp == nil)
   1122 		return -1;
   1123 
   1124 	optr = h->tcpopt;
   1125 	n = hdrlen - TCP6_HDRSIZE;
   1126 	while(n > 0 && *optr != EOLOPT) {
   1127 		if(*optr == NOOPOPT) {
   1128 			n--;
   1129 			optr++;
   1130 			continue;
   1131 		}
   1132 		optlen = optr[1];
   1133 		if(optlen < 2 || optlen > n)
   1134 			break;
   1135 		switch(*optr) {
   1136 		case MSSOPT:
   1137 			if(optlen == MSS_LENGTH)
   1138 				tcph->mss = nhgets(optr+2);
   1139 			break;
   1140 		case WSOPT:
   1141 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
   1142 				tcph->ws = HaveWS | *(optr+2);
   1143 			break;
   1144 		}
   1145 		n -= optlen;
   1146 		optr += optlen;
   1147 	}
   1148 	return hdrlen;
   1149 }
   1150 
   1151 int
   1152 ntohtcp4(Tcp *tcph, Block **bpp)
   1153 {
   1154 	Tcp4hdr *h;
   1155 	uchar *optr;
   1156 	ushort hdrlen;
   1157 	ushort optlen;
   1158 	int n;
   1159 
   1160 	*bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
   1161 	if(*bpp == nil)
   1162 		return -1;
   1163 
   1164 	h = (Tcp4hdr *)((*bpp)->rp);
   1165 	tcph->source = nhgets(h->tcpsport);
   1166 	tcph->dest = nhgets(h->tcpdport);
   1167 	tcph->seq = nhgetl(h->tcpseq);
   1168 	tcph->ack = nhgetl(h->tcpack);
   1169 
   1170 	hdrlen = (h->tcpflag[0]>>2) & ~3;
   1171 	if(hdrlen < TCP4_HDRSIZE) {
   1172 		freeblist(*bpp);
   1173 		return -1;
   1174 	}
   1175 
   1176 	tcph->flags = h->tcpflag[1];
   1177 	tcph->wnd = nhgets(h->tcpwin);
   1178 	tcph->urg = nhgets(h->tcpurg);
   1179 	tcph->mss = 0;
   1180 	tcph->ws = 0;
   1181 	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
   1182 
   1183 	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
   1184 	if(*bpp == nil)
   1185 		return -1;
   1186 
   1187 	optr = h->tcpopt;
   1188 	n = hdrlen - TCP4_HDRSIZE;
   1189 	while(n > 0 && *optr != EOLOPT) {
   1190 		if(*optr == NOOPOPT) {
   1191 			n--;
   1192 			optr++;
   1193 			continue;
   1194 		}
   1195 		optlen = optr[1];
   1196 		if(optlen < 2 || optlen > n)
   1197 			break;
   1198 		switch(*optr) {
   1199 		case MSSOPT:
   1200 			if(optlen == MSS_LENGTH)
   1201 				tcph->mss = nhgets(optr+2);
   1202 			break;
   1203 		case WSOPT:
   1204 			if(optlen == WS_LENGTH && *(optr+2) <= 14)
   1205 				tcph->ws = HaveWS | *(optr+2);
   1206 			break;
   1207 		}
   1208 		n -= optlen;
   1209 		optr += optlen;
   1210 	}
   1211 	return hdrlen;
   1212 }
   1213 
   1214 /*
   1215  *  For outgiing calls, generate an initial sequence
   1216  *  number and put a SYN on the send queue
   1217  */
   1218 void
   1219 tcpsndsyn(Conv *s, Tcpctl *tcb)
   1220 {
   1221 	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
   1222 	tcb->rttseq = tcb->iss;
   1223 	tcb->snd.wl2 = tcb->iss;
   1224 	tcb->snd.una = tcb->iss;
   1225 	tcb->snd.ptr = tcb->rttseq;
   1226 	tcb->snd.nxt = tcb->rttseq;
   1227 	tcb->flgcnt++;
   1228 	tcb->flags |= FORCE;
   1229 	tcb->sndsyntime = NOW;
   1230 
   1231 	/* set desired mss and scale */
   1232 	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
   1233 }
   1234 
   1235 void
   1236 sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
   1237 {
   1238 	Block *hbp;
   1239 	uchar rflags;
   1240 	Tcppriv *tpriv;
   1241 	Tcp4hdr ph4;
   1242 	Tcp6hdr ph6;
   1243 
   1244 	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
   1245 
   1246 	tpriv = tcp->priv;
   1247 
   1248 	if(seg->flags & RST)
   1249 		return;
   1250 
   1251 	/* make pseudo header */
   1252 	switch(version) {
   1253 	case V4:
   1254 		memset(&ph4, 0, sizeof(ph4));
   1255 		ph4.vihl = IP_VER4;
   1256 		v6tov4(ph4.tcpsrc, dest);
   1257 		v6tov4(ph4.tcpdst, source);
   1258 		ph4.proto = IP_TCPPROTO;
   1259 		hnputs(ph4.tcplen, TCP4_HDRSIZE);
   1260 		hnputs(ph4.tcpsport, seg->dest);
   1261 		hnputs(ph4.tcpdport, seg->source);
   1262 		break;
   1263 	case V6:
   1264 		memset(&ph6, 0, sizeof(ph6));
   1265 		ph6.vcf[0] = IP_VER6;
   1266 		ipmove(ph6.tcpsrc, dest);
   1267 		ipmove(ph6.tcpdst, source);
   1268 		ph6.proto = IP_TCPPROTO;
   1269 		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
   1270 		hnputs(ph6.tcpsport, seg->dest);
   1271 		hnputs(ph6.tcpdport, seg->source);
   1272 		break;
   1273 	default:
   1274 		panic("sndrst: version %d", version);
   1275 	}
   1276 
   1277 	tpriv->stats[OutRsts]++;
   1278 	rflags = RST;
   1279 
   1280 	/* convince the other end that this reset is in band */
   1281 	if(seg->flags & ACK) {
   1282 		seg->seq = seg->ack;
   1283 		seg->ack = 0;
   1284 	}
   1285 	else {
   1286 		rflags |= ACK;
   1287 		seg->ack = seg->seq;
   1288 		seg->seq = 0;
   1289 		if(seg->flags & SYN)
   1290 			seg->ack++;
   1291 		seg->ack += length;
   1292 		if(seg->flags & FIN)
   1293 			seg->ack++;
   1294 	}
   1295 	seg->flags = rflags;
   1296 	seg->wnd = 0;
   1297 	seg->urg = 0;
   1298 	seg->mss = 0;
   1299 	seg->ws = 0;
   1300 	switch(version) {
   1301 	case V4:
   1302 		hbp = htontcp4(seg, nil, &ph4, nil);
   1303 		if(hbp == nil)
   1304 			return;
   1305 		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
   1306 		break;
   1307 	case V6:
   1308 		hbp = htontcp6(seg, nil, &ph6, nil);
   1309 		if(hbp == nil)
   1310 			return;
   1311 		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
   1312 		break;
   1313 	default:
   1314 		panic("sndrst2: version %d", version);
   1315 	}
   1316 }
   1317 
   1318 /*
   1319  *  send a reset to the remote side and close the conversation
   1320  *  called with s QLOCKed
   1321  */
   1322 char*
   1323 tcphangup(Conv *s)
   1324 {
   1325 	Tcp seg;
   1326 	Tcpctl *tcb;
   1327 	Block *hbp;
   1328 
   1329 	tcb = (Tcpctl*)s->ptcl;
   1330 	if(waserror())
   1331 		return commonerror();
   1332 	if(ipcmp(s->raddr, IPnoaddr) != 0) {
   1333 		if(!waserror()){
   1334 			seg.flags = RST | ACK;
   1335 			seg.ack = tcb->rcv.nxt;
   1336 			tcb->rcv.una = 0;
   1337 			seg.seq = tcb->snd.ptr;
   1338 			seg.wnd = 0;
   1339 			seg.urg = 0;
   1340 			seg.mss = 0;
   1341 			seg.ws = 0;
   1342 			switch(s->ipversion) {
   1343 			case V4:
   1344 				tcb->protohdr.tcp4hdr.vihl = IP_VER4;
   1345 				hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
   1346 				ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
   1347 				break;
   1348 			case V6:
   1349 				tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
   1350 				hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
   1351 				ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
   1352 				break;
   1353 			default:
   1354 				panic("tcphangup: version %d", s->ipversion);
   1355 			}
   1356 			poperror();
   1357 		}
   1358 	}
   1359 	localclose(s, nil);
   1360 	poperror();
   1361 	return nil;
   1362 }
   1363 
   1364 /*
   1365  *  (re)send a SYN ACK
   1366  */
   1367 int
   1368 sndsynack(Proto *tcp, Limbo *lp)
   1369 {
   1370 	Block *hbp;
   1371 	Tcp4hdr ph4;
   1372 	Tcp6hdr ph6;
   1373 	Tcp seg;
   1374 	int scale;
   1375 
   1376 	/* make pseudo header */
   1377 	switch(lp->version) {
   1378 	case V4:
   1379 		memset(&ph4, 0, sizeof(ph4));
   1380 		ph4.vihl = IP_VER4;
   1381 		v6tov4(ph4.tcpsrc, lp->laddr);
   1382 		v6tov4(ph4.tcpdst, lp->raddr);
   1383 		ph4.proto = IP_TCPPROTO;
   1384 		hnputs(ph4.tcplen, TCP4_HDRSIZE);
   1385 		hnputs(ph4.tcpsport, lp->lport);
   1386 		hnputs(ph4.tcpdport, lp->rport);
   1387 		break;
   1388 	case V6:
   1389 		memset(&ph6, 0, sizeof(ph6));
   1390 		ph6.vcf[0] = IP_VER6;
   1391 		ipmove(ph6.tcpsrc, lp->laddr);
   1392 		ipmove(ph6.tcpdst, lp->raddr);
   1393 		ph6.proto = IP_TCPPROTO;
   1394 		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
   1395 		hnputs(ph6.tcpsport, lp->lport);
   1396 		hnputs(ph6.tcpdport, lp->rport);
   1397 		break;
   1398 	default:
   1399 		panic("sndrst: version %d", lp->version);
   1400 	}
   1401 
   1402 	seg.seq = lp->iss;
   1403 	seg.ack = lp->irs+1;
   1404 	seg.flags = SYN|ACK;
   1405 	seg.urg = 0;
   1406 	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
   1407 	seg.wnd = QMAX;
   1408 
   1409 	/* if the other side set scale, we should too */
   1410 	if(lp->rcvscale){
   1411 		seg.ws = scale;
   1412 		lp->sndscale = scale;
   1413 	} else {
   1414 		seg.ws = 0;
   1415 		lp->sndscale = 0;
   1416 	}
   1417 
   1418 	switch(lp->version) {
   1419 	case V4:
   1420 		hbp = htontcp4(&seg, nil, &ph4, nil);
   1421 		if(hbp == nil)
   1422 			return -1;
   1423 		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
   1424 		break;
   1425 	case V6:
   1426 		hbp = htontcp6(&seg, nil, &ph6, nil);
   1427 		if(hbp == nil)
   1428 			return -1;
   1429 		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
   1430 		break;
   1431 	default:
   1432 		panic("sndsnack: version %d", lp->version);
   1433 	}
   1434 	lp->lastsend = NOW;
   1435 	return 0;
   1436 }
   1437 
   1438 #define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
   1439 
   1440 /*
   1441  *  put a call into limbo and respond with a SYN ACK
   1442  *
   1443  *  called with proto locked
   1444  */
   1445 static void
   1446 limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
   1447 {
   1448 	Limbo *lp, **l;
   1449 	Tcppriv *tpriv;
   1450 	int h;
   1451 
   1452 	tpriv = s->p->priv;
   1453 	h = hashipa(source, seg->source);
   1454 
   1455 	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
   1456 		lp = *l;
   1457 		if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
   1458 			continue;
   1459 		if(ipcmp(lp->raddr, source) != 0)
   1460 			continue;
   1461 		if(ipcmp(lp->laddr, dest) != 0)
   1462 			continue;
   1463 
   1464 		/* each new SYN restarts the retransmits */
   1465 		lp->irs = seg->seq;
   1466 		break;
   1467 	}
   1468 	lp = *l;
   1469 	if(lp == nil){
   1470 		if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
   1471 			lp = tpriv->lht[h];
   1472 			tpriv->lht[h] = lp->next;
   1473 			lp->next = nil;
   1474 		} else {
   1475 			lp = malloc(sizeof(*lp));
   1476 			if(lp == nil)
   1477 				return;
   1478 			tpriv->nlimbo++;
   1479 		}
   1480 		*l = lp;
   1481 		lp->version = version;
   1482 		ipmove(lp->laddr, dest);
   1483 		ipmove(lp->raddr, source);
   1484 		lp->lport = seg->dest;
   1485 		lp->rport = seg->source;
   1486 		lp->mss = seg->mss;
   1487 		lp->rcvscale = seg->ws;
   1488 		lp->irs = seg->seq;
   1489 		lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
   1490 	}
   1491 
   1492 	if(sndsynack(s->p, lp) < 0){
   1493 		*l = lp->next;
   1494 		tpriv->nlimbo--;
   1495 		free(lp);
   1496 	}
   1497 }
   1498 
   1499 /*
   1500  *  resend SYN ACK's once every SYNACK_RXTIMER ms.
   1501  */
   1502 static void
   1503 limborexmit(Proto *tcp)
   1504 {
   1505 	Tcppriv *tpriv;
   1506 	Limbo **l, *lp;
   1507 	int h;
   1508 	int seen;
   1509 	ulong now;
   1510 
   1511 	tpriv = tcp->priv;
   1512 
   1513 	if(!CANQLOCK(tcp))
   1514 		return;
   1515 	seen = 0;
   1516 	now = NOW;
   1517 	for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
   1518 		for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
   1519 			lp = *l;
   1520 			seen++;
   1521 			if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
   1522 				continue;
   1523 
   1524 			/* time it out after 1 second */
   1525 			if(++(lp->rexmits) > 5){
   1526 				tpriv->nlimbo--;
   1527 				*l = lp->next;
   1528 				free(lp);
   1529 				continue;
   1530 			}
   1531 
   1532 			/* if we're being attacked, don't bother resending SYN ACK's */
   1533 			if(tpriv->nlimbo > 100)
   1534 				continue;
   1535 
   1536 			if(sndsynack(tcp, lp) < 0){
   1537 				tpriv->nlimbo--;
   1538 				*l = lp->next;
   1539 				free(lp);
   1540 				continue;
   1541 			}
   1542 
   1543 			l = &lp->next;
   1544 		}
   1545 	}
   1546 	QUNLOCK(tcp);
   1547 }
   1548 
   1549 /*
   1550  *  lookup call in limbo.  if found, throw it out.
   1551  *
   1552  *  called with proto locked
   1553  */
   1554 static void
   1555 limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
   1556 {
   1557 	Limbo *lp, **l;
   1558 	int h;
   1559 	Tcppriv *tpriv;
   1560 
   1561 	tpriv = s->p->priv;
   1562 
   1563 	/* find a call in limbo */
   1564 	h = hashipa(src, segp->source);
   1565 	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
   1566 		lp = *l;
   1567 		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
   1568 			continue;
   1569 		if(ipcmp(lp->laddr, dst) != 0)
   1570 			continue;
   1571 		if(ipcmp(lp->raddr, src) != 0)
   1572 			continue;
   1573 
   1574 		/* RST can only follow the SYN */
   1575 		if(segp->seq == lp->irs+1){
   1576 			tpriv->nlimbo--;
   1577 			*l = lp->next;
   1578 			free(lp);
   1579 		}
   1580 		break;
   1581 	}
   1582 }
   1583 
   1584 /*
   1585  *  come here when we finally get an ACK to our SYN-ACK.
   1586  *  lookup call in limbo.  if found, create a new conversation
   1587  *
   1588  *  called with proto locked
   1589  */
   1590 static Conv*
   1591 tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
   1592 {
   1593 	Conv *new;
   1594 	Tcpctl *tcb;
   1595 	Tcppriv *tpriv;
   1596 	Tcp4hdr *h4;
   1597 	Tcp6hdr *h6;
   1598 	Limbo *lp, **l;
   1599 	int h;
   1600 
   1601 	/* unless it's just an ack, it can't be someone coming out of limbo */
   1602 	if((segp->flags & SYN) || (segp->flags & ACK) == 0)
   1603 		return nil;
   1604 
   1605 	tpriv = s->p->priv;
   1606 
   1607 	/* find a call in limbo */
   1608 	h = hashipa(src, segp->source);
   1609 	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
   1610 		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
   1611 			src, segp->source, lp->raddr, lp->rport,
   1612 			dst, segp->dest, lp->laddr, lp->lport,
   1613 			version, lp->version
   1614  		);
   1615 
   1616 		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
   1617 			continue;
   1618 		if(ipcmp(lp->laddr, dst) != 0)
   1619 			continue;
   1620 		if(ipcmp(lp->raddr, src) != 0)
   1621 			continue;
   1622 
   1623 		/* we're assuming no data with the initial SYN */
   1624 		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
   1625 			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
   1626 				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
   1627 			lp = nil;
   1628 		} else {
   1629 			tpriv->nlimbo--;
   1630 			*l = lp->next;
   1631 		}
   1632 		break;
   1633 	}
   1634 	if(lp == nil)
   1635 		return nil;
   1636 
   1637 	new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
   1638 	if(new == nil)
   1639 		return nil;
   1640 
   1641 	memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
   1642 	tcb = (Tcpctl*)new->ptcl;
   1643 	tcb->flags &= ~CLONE;
   1644 	tcb->timer.arg = new;
   1645 	tcb->timer.state = TcptimerOFF;
   1646 	tcb->acktimer.arg = new;
   1647 	tcb->acktimer.state = TcptimerOFF;
   1648 	tcb->katimer.arg = new;
   1649 	tcb->katimer.state = TcptimerOFF;
   1650 	tcb->rtt_timer.arg = new;
   1651 	tcb->rtt_timer.state = TcptimerOFF;
   1652 
   1653 	tcb->irs = lp->irs;
   1654 	tcb->rcv.nxt = tcb->irs+1;
   1655 	tcb->rcv.urg = tcb->rcv.nxt;
   1656 
   1657 	tcb->iss = lp->iss;
   1658 	tcb->rttseq = tcb->iss;
   1659 	tcb->snd.wl2 = tcb->iss;
   1660 	tcb->snd.una = tcb->iss+1;
   1661 	tcb->snd.ptr = tcb->iss+1;
   1662 	tcb->snd.nxt = tcb->iss+1;
   1663 	tcb->flgcnt = 0;
   1664 	tcb->flags |= SYNACK;
   1665 
   1666 	/* our sending max segment size cannot be bigger than what he asked for */
   1667 	if(lp->mss != 0 && lp->mss < tcb->mss)
   1668 		tcb->mss = lp->mss;
   1669 
   1670 	/* window scaling */
   1671 	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
   1672 
   1673 	/* the congestion window always starts out as a single segment */
   1674 	tcb->snd.wnd = segp->wnd;
   1675 	tcb->cwind = tcb->mss;
   1676 
   1677 	/* set initial round trip time */
   1678 	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
   1679 	tcpsynackrtt(new);
   1680 
   1681 	free(lp);
   1682 
   1683 	/* set up proto header */
   1684 	switch(version){
   1685 	case V4:
   1686 		h4 = &tcb->protohdr.tcp4hdr;
   1687 		memset(h4, 0, sizeof(*h4));
   1688 		h4->proto = IP_TCPPROTO;
   1689 		hnputs(h4->tcpsport, new->lport);
   1690 		hnputs(h4->tcpdport, new->rport);
   1691 		v6tov4(h4->tcpsrc, dst);
   1692 		v6tov4(h4->tcpdst, src);
   1693 		break;
   1694 	case V6:
   1695 		h6 = &tcb->protohdr.tcp6hdr;
   1696 		memset(h6, 0, sizeof(*h6));
   1697 		h6->proto = IP_TCPPROTO;
   1698 		hnputs(h6->tcpsport, new->lport);
   1699 		hnputs(h6->tcpdport, new->rport);
   1700 		ipmove(h6->tcpsrc, dst);
   1701 		ipmove(h6->tcpdst, src);
   1702 		break;
   1703 	default:
   1704 		panic("tcpincoming: version %d", new->ipversion);
   1705 	}
   1706 
   1707 	tcpsetstate(new, Established);
   1708 
   1709 	iphtadd(&tpriv->ht, new);
   1710 
   1711 	return new;
   1712 }
   1713 
   1714 int
   1715 seq_within(ulong x, ulong low, ulong high)
   1716 {
   1717 	if(low <= high){
   1718 		if(low <= x && x <= high)
   1719 			return 1;
   1720 	}
   1721 	else {
   1722 		if(x >= low || x <= high)
   1723 			return 1;
   1724 	}
   1725 	return 0;
   1726 }
   1727 
   1728 int
   1729 seq_lt(ulong x, ulong y)
   1730 {
   1731 	return (int)(x-y) < 0;
   1732 }
   1733 
   1734 int
   1735 seq_le(ulong x, ulong y)
   1736 {
   1737 	return (int)(x-y) <= 0;
   1738 }
   1739 
   1740 int
   1741 seq_gt(ulong x, ulong y)
   1742 {
   1743 	return (int)(x-y) > 0;
   1744 }
   1745 
   1746 int
   1747 seq_ge(ulong x, ulong y)
   1748 {
   1749 	return (int)(x-y) >= 0;
   1750 }
   1751 
   1752 /*
   1753  *  use the time between the first SYN and it's ack as the
   1754  *  initial round trip time
   1755  */
   1756 void
   1757 tcpsynackrtt(Conv *s)
   1758 {
   1759 	Tcpctl *tcb;
   1760 	int delta;
   1761 	Tcppriv *tpriv;
   1762 
   1763 	tcb = (Tcpctl*)s->ptcl;
   1764 	tpriv = s->p->priv;
   1765 
   1766 	delta = NOW - tcb->sndsyntime;
   1767 	tcb->srtt = delta<<LOGAGAIN;
   1768 	tcb->mdev = delta<<LOGDGAIN;
   1769 
   1770 	/* halt round trip timer */
   1771 	tcphalt(tpriv, &tcb->rtt_timer);
   1772 }
   1773 
   1774 void
   1775 update(Conv *s, Tcp *seg)
   1776 {
   1777 	int rtt, delta;
   1778 	Tcpctl *tcb;
   1779 	ulong acked;
   1780 	ulong expand;
   1781 	Tcppriv *tpriv;
   1782 
   1783 	tpriv = s->p->priv;
   1784 	tcb = (Tcpctl*)s->ptcl;
   1785 
   1786 	/* if everything has been acked, force output(?) */
   1787 	if(seq_gt(seg->ack, tcb->snd.nxt)) {
   1788 		tcb->flags |= FORCE;
   1789 		return;
   1790 	}
   1791 
   1792 	/* added by Dong Lin for fast retransmission */
   1793 	if(seg->ack == tcb->snd.una
   1794 	&& tcb->snd.una != tcb->snd.nxt
   1795 	&& seg->len == 0
   1796 	&& seg->wnd == tcb->snd.wnd) {
   1797 
   1798 		/* this is a pure ack w/o window update */
   1799 		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
   1800 			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
   1801 
   1802 		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
   1803 			/*
   1804 			 *  tahoe tcp rxt the packet, half sshthresh,
   1805  			 *  and set cwnd to one packet
   1806 			 */
   1807 			tcb->snd.recovery = 1;
   1808 			tcb->snd.rxt = tcb->snd.nxt;
   1809 			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
   1810 			tcprxmit(s);
   1811 		} else {
   1812 			/* do reno tcp here. */
   1813 		}
   1814 	}
   1815 
   1816 	/*
   1817 	 *  update window
   1818 	 */
   1819 	if(seq_gt(seg->ack, tcb->snd.wl2)
   1820 	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
   1821 		tcb->snd.wnd = seg->wnd;
   1822 		tcb->snd.wl2 = seg->ack;
   1823 	}
   1824 
   1825 	if(!seq_gt(seg->ack, tcb->snd.una)){
   1826 		/*
   1827 		 *  don't let us hangup if sending into a closed window and
   1828 		 *  we're still getting acks
   1829 		 */
   1830 		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
   1831 			tcb->backedoff = MAXBACKMS/4;
   1832 		}
   1833 		return;
   1834 	}
   1835 
   1836 	/*
   1837 	 *  any positive ack turns off fast rxt,
   1838 	 *  (should we do new-reno on partial acks?)
   1839 	 */
   1840 	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
   1841 		tcb->snd.dupacks = 0;
   1842 		tcb->snd.recovery = 0;
   1843 	} else
   1844 		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
   1845 
   1846 	/* Compute the new send window size */
   1847 	acked = seg->ack - tcb->snd.una;
   1848 
   1849 	/* avoid slow start and timers for SYN acks */
   1850 	if((tcb->flags & SYNACK) == 0) {
   1851 		tcb->flags |= SYNACK;
   1852 		acked--;
   1853 		tcb->flgcnt--;
   1854 		goto done;
   1855 	}
   1856 
   1857 	/* slow start as long as we're not recovering from lost packets */
   1858 	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
   1859 		if(tcb->cwind < tcb->ssthresh) {
   1860 			expand = tcb->mss;
   1861 			if(acked < expand)
   1862 				expand = acked;
   1863 		}
   1864 		else
   1865 			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
   1866 
   1867 		if(tcb->cwind + expand < tcb->cwind)
   1868 			expand = tcb->snd.wnd - tcb->cwind;
   1869 		if(tcb->cwind + expand > tcb->snd.wnd)
   1870 			expand = tcb->snd.wnd - tcb->cwind;
   1871 		tcb->cwind += expand;
   1872 	}
   1873 
   1874 	/* Adjust the timers according to the round trip time */
   1875 	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
   1876 		tcphalt(tpriv, &tcb->rtt_timer);
   1877 		if((tcb->flags&RETRAN) == 0) {
   1878 			tcb->backoff = 0;
   1879 			tcb->backedoff = 0;
   1880 			rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
   1881 			if(rtt == 0)
   1882 				rtt = 1;	/* otherwise all close systems will rexmit in 0 time */
   1883 			rtt *= MSPTICK;
   1884 			if(tcb->srtt == 0) {
   1885 				tcb->srtt = rtt << LOGAGAIN;
   1886 				tcb->mdev = rtt << LOGDGAIN;
   1887 			} else {
   1888 				delta = rtt - (tcb->srtt>>LOGAGAIN);
   1889 				tcb->srtt += delta;
   1890 				if(tcb->srtt <= 0)
   1891 					tcb->srtt = 1;
   1892 
   1893 				delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
   1894 				tcb->mdev += delta;
   1895 				if(tcb->mdev <= 0)
   1896 					tcb->mdev = 1;
   1897 			}
   1898 			tcpsettimer(tcb);
   1899 		}
   1900 	}
   1901 
   1902 done:
   1903 	if(qdiscard(s->wq, acked) < acked)
   1904 		tcb->flgcnt--;
   1905 
   1906 	tcb->snd.una = seg->ack;
   1907 	if(seq_gt(seg->ack, tcb->snd.urg))
   1908 		tcb->snd.urg = seg->ack;
   1909 
   1910 	if(tcb->snd.una != tcb->snd.nxt)
   1911 		tcpgo(tpriv, &tcb->timer);
   1912 	else
   1913 		tcphalt(tpriv, &tcb->timer);
   1914 
   1915 	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
   1916 		tcb->snd.ptr = tcb->snd.una;
   1917 
   1918 	tcb->flags &= ~RETRAN;
   1919 	tcb->backoff = 0;
   1920 	tcb->backedoff = 0;
   1921 }
   1922 
   1923 void
   1924 tcpiput(Proto *tcp, Ipifc* _, Block *bp)
   1925 {
   1926 	Tcp seg;
   1927 	Tcp4hdr *h4;
   1928 	Tcp6hdr *h6;
   1929 	int hdrlen;
   1930 	Tcpctl *tcb;
   1931 	ushort length, csum;
   1932 	uchar source[IPaddrlen], dest[IPaddrlen];
   1933 	Conv *s;
   1934 	Fs *f;
   1935 	Tcppriv *tpriv;
   1936 	uchar version;
   1937 
   1938 	f = tcp->f;
   1939 	tpriv = tcp->priv;
   1940 
   1941 	tpriv->stats[InSegs]++;
   1942 
   1943 	h4 = (Tcp4hdr*)(bp->rp);
   1944 	h6 = (Tcp6hdr*)(bp->rp);
   1945 
   1946 	if((h4->vihl&0xF0)==IP_VER4) {
   1947 		version = V4;
   1948 		length = nhgets(h4->length);
   1949 		v4tov6(dest, h4->tcpdst);
   1950 		v4tov6(source, h4->tcpsrc);
   1951 
   1952 		h4->Unused = 0;
   1953 		hnputs(h4->tcplen, length-TCP4_PKT);
   1954 		if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
   1955 			ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
   1956 			tpriv->stats[CsumErrs]++;
   1957 			tpriv->stats[InErrs]++;
   1958 			netlog(f, Logtcp, "bad tcp proto cksum\n");
   1959 			freeblist(bp);
   1960 			return;
   1961 		}
   1962 
   1963 		hdrlen = ntohtcp4(&seg, &bp);
   1964 		if(hdrlen < 0){
   1965 			tpriv->stats[HlenErrs]++;
   1966 			tpriv->stats[InErrs]++;
   1967 			netlog(f, Logtcp, "bad tcp hdr len\n");
   1968 			return;
   1969 		}
   1970 
   1971 		/* trim the packet to the size claimed by the datagram */
   1972 		length -= hdrlen+TCP4_PKT;
   1973 		bp = trimblock(bp, hdrlen+TCP4_PKT, length);
   1974 		if(bp == nil){
   1975 			tpriv->stats[LenErrs]++;
   1976 			tpriv->stats[InErrs]++;
   1977 			netlog(f, Logtcp, "tcp len < 0 after trim\n");
   1978 			return;
   1979 		}
   1980 	}
   1981 	else {
   1982 		int ttl = h6->ttl;
   1983 		int proto = h6->proto;
   1984 
   1985 		version = V6;
   1986 		length = nhgets(h6->ploadlen);
   1987 		ipmove(dest, h6->tcpdst);
   1988 		ipmove(source, h6->tcpsrc);
   1989 
   1990 		h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
   1991 		h6->ttl = proto;
   1992 		hnputl(h6->vcf, length);
   1993 		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
   1994 		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
   1995 			tpriv->stats[CsumErrs]++;
   1996 			tpriv->stats[InErrs]++;
   1997 			netlog(f, Logtcp,
   1998 			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
   1999 				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
   2000 			freeblist(bp);
   2001 			return;
   2002 		}
   2003 		h6->ttl = ttl;
   2004 		h6->proto = proto;
   2005 		hnputs(h6->ploadlen, length);
   2006 
   2007 		hdrlen = ntohtcp6(&seg, &bp);
   2008 		if(hdrlen < 0){
   2009 			tpriv->stats[HlenErrs]++;
   2010 			tpriv->stats[InErrs]++;
   2011 			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
   2012 			return;
   2013 		}
   2014 
   2015 		/* trim the packet to the size claimed by the datagram */
   2016 		length -= hdrlen;
   2017 		bp = trimblock(bp, hdrlen+TCP6_PKT, length);
   2018 		if(bp == nil){
   2019 			tpriv->stats[LenErrs]++;
   2020 			tpriv->stats[InErrs]++;
   2021 			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
   2022 			return;
   2023 		}
   2024 	}
   2025 
   2026 	/* lock protocol while searching for a conversation */
   2027 	QLOCK(tcp);
   2028 
   2029 	/* Look for a matching conversation */
   2030 	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
   2031 	if(s == nil){
   2032 		netlog(f, Logtcp, "iphtlook failed\n");
   2033 reset:
   2034 		QUNLOCK(tcp);
   2035 		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
   2036 		freeblist(bp);
   2037 		return;
   2038 	}
   2039 
   2040 	/* if it's a listener, look for the right flags and get a new conv */
   2041 	tcb = (Tcpctl*)s->ptcl;
   2042 	if(tcb->state == Listen){
   2043 		if(seg.flags & RST){
   2044 			limborst(s, &seg, source, dest, version);
   2045 			QUNLOCK(tcp);
   2046 			freeblist(bp);
   2047 			return;
   2048 		}
   2049 
   2050 		/* if this is a new SYN, put the call into limbo */
   2051 		if((seg.flags & SYN) && (seg.flags & ACK) == 0){
   2052 			limbo(s, source, dest, &seg, version);
   2053 			QUNLOCK(tcp);
   2054 			freeblist(bp);
   2055 			return;
   2056 		}
   2057 
   2058 		/*
   2059 		 *  if there's a matching call in limbo, tcpincoming will
   2060 		 *  return it in state Syn_received
   2061 		 */
   2062 		s = tcpincoming(s, &seg, source, dest, version);
   2063 		if(s == nil)
   2064 			goto reset;
   2065 	}
   2066 
   2067 	/* The rest of the input state machine is run with the control block
   2068 	 * locked and implements the state machine directly out of the RFC.
   2069 	 * Out-of-band data is ignored - it was always a bad idea.
   2070 	 */
   2071 	tcb = (Tcpctl*)s->ptcl;
   2072 	if(waserror()){
   2073 		QUNLOCK(s);
   2074 		nexterror();
   2075 	}
   2076 	QLOCK(s);
   2077 	QUNLOCK(tcp);
   2078 
   2079 	/* fix up window */
   2080 	seg.wnd <<= tcb->rcv.scale;
   2081 
   2082 	/* every input packet in puts off the keep alive time out */
   2083 	tcpsetkacounter(tcb);
   2084 
   2085 	switch(tcb->state) {
   2086 	case Closed:
   2087 		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
   2088 		goto raise;
   2089 	case Syn_sent:
   2090 		if(seg.flags & ACK) {
   2091 			if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
   2092 				sndrst(tcp, source, dest, length, &seg, version,
   2093 					 "bad seq in Syn_sent");
   2094 				goto raise;
   2095 			}
   2096 		}
   2097 		if(seg.flags & RST) {
   2098 			if(seg.flags & ACK)
   2099 				localclose(s, Econrefused);
   2100 			goto raise;
   2101 		}
   2102 
   2103 		if(seg.flags & SYN) {
   2104 			procsyn(s, &seg);
   2105 			if(seg.flags & ACK){
   2106 				update(s, &seg);
   2107 				tcpsynackrtt(s);
   2108 				tcpsetstate(s, Established);
   2109 				tcpsetscale(s, tcb, seg.ws, tcb->scale);
   2110 			}
   2111 			else {
   2112 				tcb->time = NOW;
   2113 				tcpsetstate(s, Syn_received);	/* DLP - shouldn't this be a reset? */
   2114 			}
   2115 
   2116 			if(length != 0 || (seg.flags & FIN))
   2117 				break;
   2118 
   2119 			freeblist(bp);
   2120 			goto output;
   2121 		}
   2122 		else
   2123 			freeblist(bp);
   2124 
   2125 		QUNLOCK(s);
   2126 		poperror();
   2127 		return;
   2128 	case Syn_received:
   2129 		/* doesn't matter if it's the correct ack, we're just trying to set timing */
   2130 		if(seg.flags & ACK)
   2131 			tcpsynackrtt(s);
   2132 		break;
   2133 	}
   2134 
   2135 	/*
   2136 	 *  One DOS attack is to open connections to us and then forget about them,
   2137 	 *  thereby tying up a conv at no long term cost to the attacker.
   2138 	 *  This is an attempt to defeat these stateless DOS attacks.  See
   2139 	 *  corresponding code in tcpsendka().
   2140 	 */
   2141 	if(tcb->state != Syn_received && (seg.flags & RST) == 0){
   2142 		if(tcpporthogdefense
   2143 		&& seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
   2144 			print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
   2145 				source, seg.source, dest, seg.dest, seg.flags,
   2146 				tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
   2147 			localclose(s, "stateless hog");
   2148 		}
   2149 	}
   2150 
   2151 	/* Cut the data to fit the receive window */
   2152 	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
   2153 		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
   2154 		update(s, &seg);
   2155 		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
   2156 			tcphalt(tpriv, &tcb->rtt_timer);
   2157 			tcphalt(tpriv, &tcb->acktimer);
   2158 			tcphalt(tpriv, &tcb->katimer);
   2159 			tcpsetstate(s, Time_wait);
   2160 			tcb->timer.start = MSL2*(1000 / MSPTICK);
   2161 			tcpgo(tpriv, &tcb->timer);
   2162 		}
   2163 		if(!(seg.flags & RST)) {
   2164 			tcb->flags |= FORCE;
   2165 			goto output;
   2166 		}
   2167 		QUNLOCK(s);
   2168 		poperror();
   2169 		return;
   2170 	}
   2171 
   2172 	/* Cannot accept so answer with a rst */
   2173 	if(length && tcb->state == Closed) {
   2174 		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
   2175 		goto raise;
   2176 	}
   2177 
   2178 	/* The segment is beyond the current receive pointer so
   2179 	 * queue the data in the resequence queue
   2180 	 */
   2181 	if(seg.seq != tcb->rcv.nxt)
   2182 	if(length != 0 || (seg.flags & (SYN|FIN))) {
   2183 		update(s, &seg);
   2184 		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
   2185 			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
   2186 		tcb->flags |= FORCE;
   2187 		goto output;
   2188 	}
   2189 
   2190 	/*
   2191 	 *  keep looping till we've processed this packet plus any
   2192 	 *  adjacent packets in the resequence queue
   2193 	 */
   2194 	for(;;) {
   2195 		if(seg.flags & RST) {
   2196 			if(tcb->state == Established) {
   2197 				tpriv->stats[EstabResets]++;
   2198 				if(tcb->rcv.nxt != seg.seq)
   2199 					print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
   2200 			}
   2201 			localclose(s, Econrefused);
   2202 			goto raise;
   2203 		}
   2204 
   2205 		if((seg.flags&ACK) == 0)
   2206 			goto raise;
   2207 
   2208 		switch(tcb->state) {
   2209 		case Syn_received:
   2210 			if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
   2211 				sndrst(tcp, source, dest, length, &seg, version,
   2212 					"bad seq in Syn_received");
   2213 				goto raise;
   2214 			}
   2215 			update(s, &seg);
   2216 			tcpsetstate(s, Established);
   2217 		case Established:
   2218 		case Close_wait:
   2219 			update(s, &seg);
   2220 			break;
   2221 		case Finwait1:
   2222 			update(s, &seg);
   2223 			if(qlen(s->wq)+tcb->flgcnt == 0){
   2224 				tcphalt(tpriv, &tcb->rtt_timer);
   2225 				tcphalt(tpriv, &tcb->acktimer);
   2226 				tcpsetkacounter(tcb);
   2227 				tcb->time = NOW;
   2228 				tcpsetstate(s, Finwait2);
   2229 				tcb->katimer.start = MSL2 * (1000 / MSPTICK);
   2230 				tcpgo(tpriv, &tcb->katimer);
   2231 			}
   2232 			break;
   2233 		case Finwait2:
   2234 			update(s, &seg);
   2235 			break;
   2236 		case Closing:
   2237 			update(s, &seg);
   2238 			if(qlen(s->wq)+tcb->flgcnt == 0) {
   2239 				tcphalt(tpriv, &tcb->rtt_timer);
   2240 				tcphalt(tpriv, &tcb->acktimer);
   2241 				tcphalt(tpriv, &tcb->katimer);
   2242 				tcpsetstate(s, Time_wait);
   2243 				tcb->timer.start = MSL2*(1000 / MSPTICK);
   2244 				tcpgo(tpriv, &tcb->timer);
   2245 			}
   2246 			break;
   2247 		case Last_ack:
   2248 			update(s, &seg);
   2249 			if(qlen(s->wq)+tcb->flgcnt == 0) {
   2250 				localclose(s, nil);
   2251 				goto raise;
   2252 			}
   2253 		case Time_wait:
   2254 			tcb->flags |= FORCE;
   2255 			if(tcb->timer.state != TcptimerON)
   2256 				tcpgo(tpriv, &tcb->timer);
   2257 		}
   2258 
   2259 		if((seg.flags&URG) && seg.urg) {
   2260 			if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
   2261 				tcb->rcv.urg = seg.urg + seg.seq;
   2262 				pullblock(&bp, seg.urg);
   2263 			}
   2264 		}
   2265 		else
   2266 		if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
   2267 			tcb->rcv.urg = tcb->rcv.nxt;
   2268 
   2269 		if(length == 0) {
   2270 			if(bp != nil)
   2271 				freeblist(bp);
   2272 		}
   2273 		else {
   2274 			switch(tcb->state){
   2275 			default:
   2276 				/* Ignore segment text */
   2277 				if(bp != nil)
   2278 					freeblist(bp);
   2279 				break;
   2280 
   2281 			case Syn_received:
   2282 			case Established:
   2283 			case Finwait1:
   2284 				/* If we still have some data place on
   2285 				 * receive queue
   2286 				 */
   2287 				if(bp) {
   2288 					bp = packblock(bp);
   2289 					if(bp == nil)
   2290 						panic("tcp packblock");
   2291 					qpassnolim(s->rq, bp);
   2292 					bp = nil;
   2293 
   2294 					/*
   2295 					 *  Force an ack every 2 data messages.  This is
   2296 					 *  a hack for rob to make his home system run
   2297 					 *  faster.
   2298 					 *
   2299 					 *  this also keeps the standard TCP congestion
   2300 					 *  control working since it needs an ack every
   2301 					 *  2 max segs worth.  This is not quite that,
   2302 					 *  but under a real stream is equivalent since
   2303 					 *  every packet has a max seg in it.
   2304 					 */
   2305 					if(++(tcb->rcv.una) >= 2)
   2306 						tcb->flags |= FORCE;
   2307 				}
   2308 				tcb->rcv.nxt += length;
   2309 
   2310 				/*
   2311 				 *  update our rcv window
   2312 				 */
   2313 				tcprcvwin(s);
   2314 
   2315 				/*
   2316 				 *  turn on the acktimer if there's something
   2317 				 *  to ack
   2318 				 */
   2319 				if(tcb->acktimer.state != TcptimerON)
   2320 					tcpgo(tpriv, &tcb->acktimer);
   2321 
   2322 				break;
   2323 			case Finwait2:
   2324 				/* no process to read the data, send a reset */
   2325 				if(bp != nil)
   2326 					freeblist(bp);
   2327 				sndrst(tcp, source, dest, length, &seg, version,
   2328 					"send to Finwait2");
   2329 				QUNLOCK(s);
   2330 				poperror();
   2331 				return;
   2332 			}
   2333 		}
   2334 
   2335 		if(seg.flags & FIN) {
   2336 			tcb->flags |= FORCE;
   2337 
   2338 			switch(tcb->state) {
   2339 			case Syn_received:
   2340 			case Established:
   2341 				tcb->rcv.nxt++;
   2342 				tcpsetstate(s, Close_wait);
   2343 				break;
   2344 			case Finwait1:
   2345 				tcb->rcv.nxt++;
   2346 				if(qlen(s->wq)+tcb->flgcnt == 0) {
   2347 					tcphalt(tpriv, &tcb->rtt_timer);
   2348 					tcphalt(tpriv, &tcb->acktimer);
   2349 					tcphalt(tpriv, &tcb->katimer);
   2350 					tcpsetstate(s, Time_wait);
   2351 					tcb->timer.start = MSL2*(1000/MSPTICK);
   2352 					tcpgo(tpriv, &tcb->timer);
   2353 				}
   2354 				else
   2355 					tcpsetstate(s, Closing);
   2356 				break;
   2357 			case Finwait2:
   2358 				tcb->rcv.nxt++;
   2359 				tcphalt(tpriv, &tcb->rtt_timer);
   2360 				tcphalt(tpriv, &tcb->acktimer);
   2361 				tcphalt(tpriv, &tcb->katimer);
   2362 				tcpsetstate(s, Time_wait);
   2363 				tcb->timer.start = MSL2 * (1000/MSPTICK);
   2364 				tcpgo(tpriv, &tcb->timer);
   2365 				break;
   2366 			case Close_wait:
   2367 			case Closing:
   2368 			case Last_ack:
   2369 				break;
   2370 			case Time_wait:
   2371 				tcpgo(tpriv, &tcb->timer);
   2372 				break;
   2373 			}
   2374 		}
   2375 
   2376 		/*
   2377 		 *  get next adjacent segment from the resequence queue.
   2378 		 *  dump/trim any overlapping segments
   2379 		 */
   2380 		for(;;) {
   2381 			if(tcb->reseq == nil)
   2382 				goto output;
   2383 
   2384 			if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
   2385 				goto output;
   2386 
   2387 			getreseq(tcb, &seg, &bp, &length);
   2388 
   2389 			if(tcptrim(tcb, &seg, &bp, &length) == 0)
   2390 				break;
   2391 		}
   2392 	}
   2393 output:
   2394 	tcpoutput(s);
   2395 	QUNLOCK(s);
   2396 	poperror();
   2397 	return;
   2398 raise:
   2399 	QUNLOCK(s);
   2400 	poperror();
   2401 	freeblist(bp);
   2402 	tcpkick(s);
   2403 }
   2404 
   2405 /*
   2406  *  always enters and exits with the s locked.  We drop
   2407  *  the lock to ipoput the packet so some care has to be
   2408  *  taken by callers.
   2409  */
   2410 void
   2411 tcpoutput(Conv *s)
   2412 {
   2413 	Tcp seg;
   2414 	int msgs;
   2415 	Tcpctl *tcb;
   2416 	Block *hbp, *bp;
   2417 	int sndcnt, n;
   2418 	ulong ssize, dsize, usable, sent;
   2419 	Fs *f;
   2420 	Tcppriv *tpriv;
   2421 	uchar version;
   2422 
   2423 	f = s->p->f;
   2424 	tpriv = s->p->priv;
   2425 	version = s->ipversion;
   2426 
   2427 	for(msgs = 0; msgs < 100; msgs++) {
   2428 		tcb = (Tcpctl*)s->ptcl;
   2429 
   2430 		switch(tcb->state) {
   2431 		case Listen:
   2432 		case Closed:
   2433 		case Finwait2:
   2434 			return;
   2435 		}
   2436 
   2437 		/* force an ack when a window has opened up */
   2438 		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
   2439 			tcb->rcv.blocked = 0;
   2440 			tcb->flags |= FORCE;
   2441 		}
   2442 
   2443 		sndcnt = qlen(s->wq)+tcb->flgcnt;
   2444 		sent = tcb->snd.ptr - tcb->snd.una;
   2445 
   2446 		/* Don't send anything else until our SYN has been acked */
   2447 		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
   2448 			break;
   2449 
   2450 		/* Compute usable segment based on offered window and limit
   2451 		 * window probes to one
   2452 		 */
   2453 		if(tcb->snd.wnd == 0){
   2454 			if(sent != 0) {
   2455 				if((tcb->flags&FORCE) == 0)
   2456 					break;
   2457 //				tcb->snd.ptr = tcb->snd.una;
   2458 			}
   2459 			usable = 1;
   2460 		}
   2461 		else {
   2462 			usable = tcb->cwind;
   2463 			if(tcb->snd.wnd < usable)
   2464 				usable = tcb->snd.wnd;
   2465 			usable -= sent;
   2466 		}
   2467 		ssize = sndcnt-sent;
   2468 		if(ssize && usable < 2)
   2469 			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
   2470 				tcb->snd.wnd, tcb->cwind);
   2471 		if(usable < ssize)
   2472 			ssize = usable;
   2473 		if(tcb->mss < ssize)
   2474 			ssize = tcb->mss;
   2475 		dsize = ssize;
   2476 		seg.urg = 0;
   2477 
   2478 		if(ssize == 0)
   2479 		if((tcb->flags&FORCE) == 0)
   2480 			break;
   2481 
   2482 		tcb->flags &= ~FORCE;
   2483 		tcprcvwin(s);
   2484 
   2485 		/* By default we will generate an ack */
   2486 		tcphalt(tpriv, &tcb->acktimer);
   2487 		tcb->rcv.una = 0;
   2488 		seg.source = s->lport;
   2489 		seg.dest = s->rport;
   2490 		seg.flags = ACK;
   2491 		seg.mss = 0;
   2492 		seg.ws = 0;
   2493 		switch(tcb->state){
   2494 		case Syn_sent:
   2495 			seg.flags = 0;
   2496 			if(tcb->snd.ptr == tcb->iss){
   2497 				seg.flags |= SYN;
   2498 				dsize--;
   2499 				seg.mss = tcb->mss;
   2500 				seg.ws = tcb->scale;
   2501 			}
   2502 			break;
   2503 		case Syn_received:
   2504 			/*
   2505 			 *  don't send any data with a SYN/ACK packet
   2506 			 *  because Linux rejects the packet in its
   2507 			 *  attempt to solve the SYN attack problem
   2508 			 */
   2509 			if(tcb->snd.ptr == tcb->iss){
   2510 				seg.flags |= SYN;
   2511 				dsize = 0;
   2512 				ssize = 1;
   2513 				seg.mss = tcb->mss;
   2514 				seg.ws = tcb->scale;
   2515 			}
   2516 			break;
   2517 		}
   2518 		seg.seq = tcb->snd.ptr;
   2519 		seg.ack = tcb->rcv.nxt;
   2520 		seg.wnd = tcb->rcv.wnd;
   2521 
   2522 		/* Pull out data to send */
   2523 		bp = nil;
   2524 		if(dsize != 0) {
   2525 			bp = qcopy(s->wq, dsize, sent);
   2526 			if(BLEN(bp) != dsize) {
   2527 				seg.flags |= FIN;
   2528 				dsize--;
   2529 			}
   2530 		}
   2531 
   2532 		if(sent+dsize == sndcnt)
   2533 			seg.flags |= PSH;
   2534 
   2535 		/* keep track of balance of resent data */
   2536 		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
   2537 			n = tcb->snd.nxt - tcb->snd.ptr;
   2538 			if(ssize < n)
   2539 				n = ssize;
   2540 			tcb->resent += n;
   2541 			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
   2542 				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
   2543 			tpriv->stats[RetransSegs]++;
   2544 		}
   2545 
   2546 		tcb->snd.ptr += ssize;
   2547 
   2548 		/* Pull up the send pointer so we can accept acks
   2549 		 * for this window
   2550 		 */
   2551 		if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
   2552 			tcb->snd.nxt = tcb->snd.ptr;
   2553 
   2554 		/* Build header, link data and compute cksum */
   2555 		switch(version){
   2556 		case V4:
   2557 			tcb->protohdr.tcp4hdr.vihl = IP_VER4;
   2558 			hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
   2559 			if(hbp == nil) {
   2560 				freeblist(bp);
   2561 				return;
   2562 			}
   2563 			break;
   2564 		case V6:
   2565 			tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
   2566 			hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
   2567 			if(hbp == nil) {
   2568 				freeblist(bp);
   2569 				return;
   2570 			}
   2571 			break;
   2572 		default:
   2573 			hbp = nil;	/* to suppress a warning */
   2574 			panic("tcpoutput: version %d", version);
   2575 		}
   2576 
   2577 		/* Start the transmission timers if there is new data and we
   2578 		 * expect acknowledges
   2579 		 */
   2580 		if(ssize != 0){
   2581 			if(tcb->timer.state != TcptimerON)
   2582 				tcpgo(tpriv, &tcb->timer);
   2583 
   2584 			/*  If round trip timer isn't running, start it.
   2585 			 *  measure the longest packet only in case the
   2586 			 *  transmission time dominates RTT
   2587 			 */
   2588 			if(tcb->rtt_timer.state != TcptimerON)
   2589 			if(ssize == tcb->mss) {
   2590 				tcpgo(tpriv, &tcb->rtt_timer);
   2591 				tcb->rttseq = tcb->snd.ptr;
   2592 			}
   2593 		}
   2594 
   2595 		tpriv->stats[OutSegs]++;
   2596 
   2597 		/* put off the next keep alive */
   2598 		tcpgo(tpriv, &tcb->katimer);
   2599 
   2600 		switch(version){
   2601 		case V4:
   2602 			if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
   2603 				/* a negative return means no route */
   2604 				localclose(s, "no route");
   2605 			}
   2606 			break;
   2607 		case V6:
   2608 			if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
   2609 				/* a negative return means no route */
   2610 				localclose(s, "no route");
   2611 			}
   2612 			break;
   2613 		default:
   2614 			panic("tcpoutput2: version %d", version);
   2615 		}
   2616 		if((uint)(msgs%4) == 1){
   2617 			QUNLOCK(s);
   2618 			sched();
   2619 			QLOCK(s);
   2620 		}
   2621 	}
   2622 }
   2623 
   2624 /*
   2625  *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
   2626  */
   2627 void
   2628 tcpsendka(Conv *s)
   2629 {
   2630 	Tcp seg;
   2631 	Tcpctl *tcb;
   2632 	Block *hbp,*dbp;
   2633 
   2634 	tcb = (Tcpctl*)s->ptcl;
   2635 
   2636 	dbp = nil;
   2637 	seg.urg = 0;
   2638 	seg.source = s->lport;
   2639 	seg.dest = s->rport;
   2640 	seg.flags = ACK|PSH;
   2641 	seg.mss = 0;
   2642 	seg.ws = 0;
   2643 	if(tcpporthogdefense)
   2644 		seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
   2645 	else
   2646 		seg.seq = tcb->snd.una-1;
   2647 	seg.ack = tcb->rcv.nxt;
   2648 	tcb->rcv.una = 0;
   2649 	seg.wnd = tcb->rcv.wnd;
   2650 	if(tcb->state == Finwait2){
   2651 		seg.flags |= FIN;
   2652 	} else {
   2653 		dbp = allocb(1);
   2654 		dbp->wp++;
   2655 	}
   2656 
   2657 	if(isv4(s->raddr)) {
   2658 		/* Build header, link data and compute cksum */
   2659 		tcb->protohdr.tcp4hdr.vihl = IP_VER4;
   2660 		hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
   2661 		if(hbp == nil) {
   2662 			freeblist(dbp);
   2663 			return;
   2664 		}
   2665 		ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
   2666 	}
   2667 	else {
   2668 		/* Build header, link data and compute cksum */
   2669 		tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
   2670 		hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
   2671 		if(hbp == nil) {
   2672 			freeblist(dbp);
   2673 			return;
   2674 		}
   2675 		ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
   2676 	}
   2677 }
   2678 
   2679 /*
   2680  *  set connection to time out after 12 minutes
   2681  */
   2682 void
   2683 tcpsetkacounter(Tcpctl *tcb)
   2684 {
   2685 	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
   2686 	if(tcb->kacounter < 3)
   2687 		tcb->kacounter = 3;
   2688 }
   2689 
   2690 /*
   2691  *  if we've timed out, close the connection
   2692  *  otherwise, send a keepalive and restart the timer
   2693  */
   2694 void
   2695 tcpkeepalive(void *v)
   2696 {
   2697 	Tcpctl *tcb;
   2698 	Conv *s;
   2699 
   2700 	s = v;
   2701 	tcb = (Tcpctl*)s->ptcl;
   2702 	if(waserror()){
   2703 		QUNLOCK(s);
   2704 		nexterror();
   2705 	}
   2706 	QLOCK(s);
   2707 	if(tcb->state != Closed){
   2708 		if(--(tcb->kacounter) <= 0) {
   2709 			localclose(s, Etimedout);
   2710 		} else {
   2711 			tcpsendka(s);
   2712 			tcpgo(s->p->priv, &tcb->katimer);
   2713 		}
   2714 	}
   2715 	QUNLOCK(s);
   2716 	poperror();
   2717 }
   2718 
   2719 /*
   2720  *  start keepalive timer
   2721  */
   2722 char*
   2723 tcpstartka(Conv *s, char **f, int n)
   2724 {
   2725 	Tcpctl *tcb;
   2726 	int x;
   2727 
   2728 	tcb = (Tcpctl*)s->ptcl;
   2729 	if(tcb->state != Established)
   2730 		return "connection must be in Establised state";
   2731 	if(n > 1){
   2732 		x = atoi(f[1]);
   2733 		if(x >= MSPTICK)
   2734 			tcb->katimer.start = x/MSPTICK;
   2735 	}
   2736 	tcpsetkacounter(tcb);
   2737 	tcpgo(s->p->priv, &tcb->katimer);
   2738 
   2739 	return nil;
   2740 }
   2741 
   2742 /*
   2743  *  turn checksums on/off
   2744  */
   2745 char*
   2746 tcpsetchecksum(Conv *s, char **f, int _)
   2747 {
   2748 	Tcpctl *tcb;
   2749 
   2750 	tcb = (Tcpctl*)s->ptcl;
   2751 	tcb->nochecksum = !atoi(f[1]);
   2752 
   2753 	return nil;
   2754 }
   2755 
   2756 void
   2757 tcprxmit(Conv *s)
   2758 {
   2759 	Tcpctl *tcb;
   2760 
   2761 	tcb = (Tcpctl*)s->ptcl;
   2762 
   2763 	tcb->flags |= RETRAN|FORCE;
   2764 	tcb->snd.ptr = tcb->snd.una;
   2765 
   2766 	/*
   2767 	 *  We should be halving the slow start threshhold (down to one
   2768 	 *  mss) but leaving it at mss seems to work well enough
   2769 	 */
   2770  	tcb->ssthresh = tcb->mss;
   2771 
   2772 	/*
   2773 	 *  pull window down to a single packet
   2774 	 */
   2775 	tcb->cwind = tcb->mss;
   2776 	tcpoutput(s);
   2777 }
   2778 
   2779 void
   2780 tcptimeout(void *arg)
   2781 {
   2782 	Conv *s;
   2783 	Tcpctl *tcb;
   2784 	int maxback;
   2785 	Tcppriv *tpriv;
   2786 
   2787 	s = (Conv*)arg;
   2788 	tpriv = s->p->priv;
   2789 	tcb = (Tcpctl*)s->ptcl;
   2790 
   2791 	if(waserror()){
   2792 		QUNLOCK(s);
   2793 		nexterror();
   2794 	}
   2795 	QLOCK(s);
   2796 	switch(tcb->state){
   2797 	default:
   2798 		tcb->backoff++;
   2799 		if(tcb->state == Syn_sent)
   2800 			maxback = MAXBACKMS/2;
   2801 		else
   2802 			maxback = MAXBACKMS;
   2803 		tcb->backedoff += tcb->timer.start * MSPTICK;
   2804 		if(tcb->backedoff >= maxback) {
   2805 			localclose(s, Etimedout);
   2806 			break;
   2807 		}
   2808 		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
   2809 		tcpsettimer(tcb);
   2810 		tcprxmit(s);
   2811 		tpriv->stats[RetransTimeouts]++;
   2812 		tcb->snd.dupacks = 0;
   2813 		break;
   2814 	case Time_wait:
   2815 		localclose(s, nil);
   2816 		break;
   2817 	case Closed:
   2818 		break;
   2819 	}
   2820 	QUNLOCK(s);
   2821 	poperror();
   2822 }
   2823 
   2824 int
   2825 inwindow(Tcpctl *tcb, int seq)
   2826 {
   2827 	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
   2828 }
   2829 
   2830 /*
   2831  *  set up state for a received SYN (or SYN ACK) packet
   2832  */
   2833 void
   2834 procsyn(Conv *s, Tcp *seg)
   2835 {
   2836 	Tcpctl *tcb;
   2837 
   2838 	tcb = (Tcpctl*)s->ptcl;
   2839 	tcb->flags |= FORCE;
   2840 
   2841 	tcb->rcv.nxt = seg->seq + 1;
   2842 	tcb->rcv.urg = tcb->rcv.nxt;
   2843 	tcb->irs = seg->seq;
   2844 
   2845 	/* our sending max segment size cannot be bigger than what he asked for */
   2846 	if(seg->mss != 0 && seg->mss < tcb->mss)
   2847 		tcb->mss = seg->mss;
   2848 
   2849 	/* the congestion window always starts out as a single segment */
   2850 	tcb->snd.wnd = seg->wnd;
   2851 	tcb->cwind = tcb->mss;
   2852 }
   2853 
   2854 int
   2855 addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
   2856 {
   2857 	Reseq *rp, *rp1;
   2858 	int i, rqlen, qmax;
   2859 
   2860 	rp = malloc(sizeof(Reseq));
   2861 	if(rp == nil){
   2862 		freeblist(bp);	/* bp always consumed by add_reseq */
   2863 		return 0;
   2864 	}
   2865 
   2866 	rp->seg = *seg;
   2867 	rp->bp = bp;
   2868 	rp->length = length;
   2869 
   2870 	/* Place on reassembly list sorting by starting seq number */
   2871 	rp1 = tcb->reseq;
   2872 	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
   2873 		rp->next = rp1;
   2874 		tcb->reseq = rp;
   2875 		if(rp->next != nil)
   2876 			tpriv->stats[OutOfOrder]++;
   2877 		return 0;
   2878 	}
   2879 
   2880 	rqlen = 0;
   2881 	for(i = 0;; i++) {
   2882 		rqlen += rp1->length;
   2883 		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
   2884 			rp->next = rp1->next;
   2885 			rp1->next = rp;
   2886 			if(rp->next != nil)
   2887 				tpriv->stats[OutOfOrder]++;
   2888 			break;
   2889 		}
   2890 		rp1 = rp1->next;
   2891 	}
   2892 	qmax = QMAX<<tcb->rcv.scale;
   2893 	if(rqlen > qmax){
   2894 		print("resequence queue > window: %d > %d\n", rqlen, qmax);
   2895 		i = 0;
   2896 	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
   2897 	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
   2898 	  			rp1->seg.ack, rp1->seg.flags);
   2899 			if(i++ > 10){
   2900 				print("...\n");
   2901 				break;
   2902 			}
   2903 		}
   2904 
   2905 		/*
   2906 		 * delete entire reassembly queue; wait for retransmit.
   2907 		 * - should we be smarter and only delete the tail?
   2908 		 */
   2909 		for(rp = tcb->reseq; rp != nil; rp = rp1){
   2910 			rp1 = rp->next;
   2911 			freeblist(rp->bp);
   2912 			free(rp);
   2913 		}
   2914 		tcb->reseq = nil;
   2915 
   2916 	  	return -1;
   2917 	}
   2918 	return 0;
   2919 }
   2920 
   2921 void
   2922 getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
   2923 {
   2924 	Reseq *rp;
   2925 
   2926 	rp = tcb->reseq;
   2927 	if(rp == nil)
   2928 		return;
   2929 
   2930 	tcb->reseq = rp->next;
   2931 
   2932 	*seg = rp->seg;
   2933 	*bp = rp->bp;
   2934 	*length = rp->length;
   2935 
   2936 	free(rp);
   2937 }
   2938 
   2939 int
   2940 tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
   2941 {
   2942 	ushort len;
   2943 	uchar accept;
   2944 	int dupcnt, excess;
   2945 
   2946 	accept = 0;
   2947 	len = *length;
   2948 	if(seg->flags & SYN)
   2949 		len++;
   2950 	if(seg->flags & FIN)
   2951 		len++;
   2952 
   2953 	if(tcb->rcv.wnd == 0) {
   2954 		if(len == 0 && seg->seq == tcb->rcv.nxt)
   2955 			return 0;
   2956 	}
   2957 	else {
   2958 		/* Some part of the segment should be in the window */
   2959 		if(inwindow(tcb,seg->seq))
   2960 			accept++;
   2961 		else
   2962 		if(len != 0) {
   2963 			if(inwindow(tcb, seg->seq+len-1) ||
   2964 			seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
   2965 				accept++;
   2966 		}
   2967 	}
   2968 	if(!accept) {
   2969 		freeblist(*bp);
   2970 		return -1;
   2971 	}
   2972 	dupcnt = tcb->rcv.nxt - seg->seq;
   2973 	if(dupcnt > 0){
   2974 		tcb->rerecv += dupcnt;
   2975 		if(seg->flags & SYN){
   2976 			seg->flags &= ~SYN;
   2977 			seg->seq++;
   2978 
   2979 			if(seg->urg > 1)
   2980 				seg->urg--;
   2981 			else
   2982 				seg->flags &= ~URG;
   2983 			dupcnt--;
   2984 		}
   2985 		if(dupcnt > 0){
   2986 			pullblock(bp, (ushort)dupcnt);
   2987 			seg->seq += dupcnt;
   2988 			*length -= dupcnt;
   2989 
   2990 			if(seg->urg > dupcnt)
   2991 				seg->urg -= dupcnt;
   2992 			else {
   2993 				seg->flags &= ~URG;
   2994 				seg->urg = 0;
   2995 			}
   2996 		}
   2997 	}
   2998 	excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
   2999 	if(excess > 0) {
   3000 		tcb->rerecv += excess;
   3001 		*length -= excess;
   3002 		*bp = trimblock(*bp, 0, *length);
   3003 		if(*bp == nil)
   3004 			panic("presotto is a boofhead");
   3005 		seg->flags &= ~FIN;
   3006 	}
   3007 	return 0;
   3008 }
   3009 
   3010 void
   3011 tcpadvise(Proto *tcp, Block *bp, char *msg)
   3012 {
   3013 	Tcp4hdr *h4;
   3014 	Tcp6hdr *h6;
   3015 	Tcpctl *tcb;
   3016 	uchar source[IPaddrlen];
   3017 	uchar dest[IPaddrlen];
   3018 	ushort psource, pdest;
   3019 	Conv *s, **p;
   3020 
   3021 	h4 = (Tcp4hdr*)(bp->rp);
   3022 	h6 = (Tcp6hdr*)(bp->rp);
   3023 
   3024 	if((h4->vihl&0xF0)==IP_VER4) {
   3025 		v4tov6(dest, h4->tcpdst);
   3026 		v4tov6(source, h4->tcpsrc);
   3027 		psource = nhgets(h4->tcpsport);
   3028 		pdest = nhgets(h4->tcpdport);
   3029 	}
   3030 	else {
   3031 		ipmove(dest, h6->tcpdst);
   3032 		ipmove(source, h6->tcpsrc);
   3033 		psource = nhgets(h6->tcpsport);
   3034 		pdest = nhgets(h6->tcpdport);
   3035 	}
   3036 
   3037 	/* Look for a connection */
   3038 	QLOCK(tcp);
   3039 	for(p = tcp->conv; *p; p++) {
   3040 		s = *p;
   3041 		tcb = (Tcpctl*)s->ptcl;
   3042 		if(s->rport == pdest)
   3043 		if(s->lport == psource)
   3044 		if(tcb->state != Closed)
   3045 		if(ipcmp(s->raddr, dest) == 0)
   3046 		if(ipcmp(s->laddr, source) == 0){
   3047 			QLOCK(s);
   3048 			QUNLOCK(tcp);
   3049 			switch(tcb->state){
   3050 			case Syn_sent:
   3051 				localclose(s, msg);
   3052 				break;
   3053 			}
   3054 			QUNLOCK(s);
   3055 			freeblist(bp);
   3056 			return;
   3057 		}
   3058 	}
   3059 	QUNLOCK(tcp);
   3060 	freeblist(bp);
   3061 }
   3062 
   3063 static char*
   3064 tcpporthogdefensectl(char *val)
   3065 {
   3066 	if(strcmp(val, "on") == 0)
   3067 		tcpporthogdefense = 1;
   3068 	else if(strcmp(val, "off") == 0)
   3069 		tcpporthogdefense = 0;
   3070 	else
   3071 		return "unknown value for tcpporthogdefense";
   3072 	return nil;
   3073 }
   3074 
   3075 /* called with c QLOCKed */
   3076 char*
   3077 tcpctl(Conv* c, char** f, int n)
   3078 {
   3079 	if(n == 1 && strcmp(f[0], "hangup") == 0)
   3080 		return tcphangup(c);
   3081 	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
   3082 		return tcpstartka(c, f, n);
   3083 	if(n >= 1 && strcmp(f[0], "checksum") == 0)
   3084 		return tcpsetchecksum(c, f, n);
   3085 	if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
   3086 		return tcpporthogdefensectl(f[1]);
   3087 	return "unknown control request";
   3088 }
   3089 
   3090 int
   3091 tcpstats(Proto *tcp, char *buf, int len)
   3092 {
   3093 	Tcppriv *priv;
   3094 	char *p, *e;
   3095 	int i;
   3096 
   3097 	priv = tcp->priv;
   3098 	p = buf;
   3099 	e = p+len;
   3100 	for(i = 0; i < Nstats; i++)
   3101 		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
   3102 	return p - buf;
   3103 }
   3104 
   3105 /*
   3106  *  garbage collect any stale conversations:
   3107  *	- SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
   3108  *	- Finwait2 after 5 minutes
   3109  *
   3110  *  this is called whenever we run out of channels.  Both checks are
   3111  *  of questionable validity so we try to use them only when we're
   3112  *  up against the wall.
   3113  */
   3114 int
   3115 tcpgc(Proto *tcp)
   3116 {
   3117 	Conv *c, **pp, **ep;
   3118 	int n;
   3119 	Tcpctl *tcb;
   3120 
   3121 
   3122 	n = 0;
   3123 	ep = &tcp->conv[tcp->nc];
   3124 	for(pp = tcp->conv; pp < ep; pp++) {
   3125 		c = *pp;
   3126 		if(c == nil)
   3127 			break;
   3128 		if(!CANQLOCK(c))
   3129 			continue;
   3130 		tcb = (Tcpctl*)c->ptcl;
   3131 		switch(tcb->state){
   3132 		case Syn_received:
   3133 			if(NOW - tcb->time > 5000){
   3134 				localclose(c, "timed out");
   3135 				n++;
   3136 			}
   3137 			break;
   3138 		case Finwait2:
   3139 			if(NOW - tcb->time > 5*60*1000){
   3140 				localclose(c, "timed out");
   3141 				n++;
   3142 			}
   3143 			break;
   3144 		}
   3145 		QUNLOCK(c);
   3146 	}
   3147 	return n;
   3148 }
   3149 
   3150 void
   3151 tcpsettimer(Tcpctl *tcb)
   3152 {
   3153 	int x;
   3154 
   3155 	/* round trip dependency */
   3156 	x = backoff(tcb->backoff) *
   3157 		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
   3158 
   3159 	/* bounded twixt 1/2 and 64 seconds */
   3160 	if(x < 500/MSPTICK)
   3161 		x = 500/MSPTICK;
   3162 	else if(x > (64000/MSPTICK))
   3163 		x = 64000/MSPTICK;
   3164 	tcb->timer.start = x;
   3165 }
   3166 
   3167 void
   3168 tcpinit(Fs *fs)
   3169 {
   3170 	Proto *tcp;
   3171 	Tcppriv *tpriv;
   3172 
   3173 	tcp = smalloc(sizeof(Proto));
   3174 	tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
   3175 	tcp->name = "tcp";
   3176 	tcp->connect = tcpconnect;
   3177 	tcp->announce = tcpannounce;
   3178 	tcp->ctl = tcpctl;
   3179 	tcp->state = tcpstate;
   3180 	tcp->create = tcpcreate;
   3181 	tcp->close = tcpclose;
   3182 	tcp->rcv = tcpiput;
   3183 	tcp->advise = tcpadvise;
   3184 	tcp->stats = tcpstats;
   3185 	tcp->inuse = tcpinuse;
   3186 	tcp->gc = tcpgc;
   3187 	tcp->ipproto = IP_TCPPROTO;
   3188 	tcp->nc = scalednconv();
   3189 	tcp->ptclsize = sizeof(Tcpctl);
   3190 	tpriv->stats[MaxConn] = tcp->nc;
   3191 
   3192 	Fsproto(fs, tcp);
   3193 }
   3194 
   3195 void
   3196 tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
   3197 {
   3198 	if(rcvscale){
   3199 		tcb->rcv.scale = rcvscale & 0xff;
   3200 		tcb->snd.scale = sndscale & 0xff;
   3201 		tcb->window = QMAX<<tcb->snd.scale;
   3202 		qsetlimit(s->rq, tcb->window);
   3203 	} else {
   3204 		tcb->rcv.scale = 0;
   3205 		tcb->snd.scale = 0;
   3206 		tcb->window = QMAX;
   3207 		qsetlimit(s->rq, tcb->window);
   3208 	}
   3209 }