(* Aos, Copyright 2001, Pieter Muller, ETH Zurich *) MODULE TCP; (** AUTHOR "pjm, mvt"; PURPOSE "TCP protocol"; *) (* TCP - Transmission Control Protocol. Based on the 4.4BSD-Lite distribution described in Wright and Stevens, "TCP/IP Illustrated, Volume 2: The Implementation", ISBN 0-201-63354-X. See the BSD copyright statement at the end of this module. From that code it inherits some horrible control flow, which was left mostly intact, to make it easier to compare with the book. TCP Header 00 16 source port 02 16 destination port 04 32 sequence number 08 32 acknowledgement number 12 08 header length & reserved 07..04 header length (4-byte units) 03..00 reserved 13 08 flags 07..06 reserved 05..05 URG 04..04 ACK 03..03 PSH 02..02 RST 01..01 SYN 00..00 FIN 14 16 window size 16 16 TCP checksum 18 16 urgent pointer 20 -- options (0-40 bytes) -- -- data TCP Pseudo-header (for checksum calculation) 00 32 source address 04 32 destination address 08 08 zero = 0 09 08 protocol = 17 10 16 TCP length (duplicate) Notes: o Bit numbers above are Intel bit order. o Avoid use of SET because of PPC bit numbering issues. o Always access fields as 8-, 16- or 32-bit values and use DIV, MOD, ASH, ODD for bit access. *) IMPORT SYSTEM, Machine, KernelLog, Clock, Modules, Objects, Kernel, Commands, Network, IP, Streams, ActiveTimers; CONST StrongChecks = FALSE; SystemMove = FALSE; TraceProtocol = FALSE; TraceError = FALSE; TracePacket = FALSE; TraceTimer = FALSE; TraceCongestion = FALSE; Trace = TraceProtocol OR TraceError OR TracePacket OR TraceTimer; HandleCongestion = TRUE; MinEphemeralPort = 1024; MaxEphemeralPort = 50000; HashTableSize = 1024 * 16; (* size of connection lookup hash table *) Acceptable = 500; (* "backlog" on listening connections *) NilPort* = 0; (** Error codes *) Ok* = 0; ConnectionRefused* = 3701; ConnectionReset* = 3702; WrongInterface* = 3703; TimedOut* = 3704; NotConnected* = 3705; NoInterface* = 3706; InterfaceClosed* = 3707; MinError = 3700; MaxError = 3735; NumErrors = MaxError-MinError+1; (** TCP connection states *) NumStates* = 12; Closed* = 0; Listen* = 1; SynSent* = 2; SynReceived* = 3; Established* = 4; CloseWait* = 5; FinWait1* = 6; Closing* = 7; LastAck* = 8; FinWait2* = 9; TimeWait* = 10; Unused* = 11; (* no real state, only used in this implementation *) OpenStates* = {Listen, SynReceived, Established, CloseWait, FinWait1, FinWait2}; ClosedStates* = {Unused, Closed, Closing, LastAck, TimeWait}; HalfClosedStates* = ClosedStates + {FinWait1, FinWait2}; FinStates* = {Unused, Closed, CloseWait, Closing, LastAck, TimeWait}; Fin = 0; Syn = 1; Rst = 2; Psh = 3; Ack = 4; Urg = 5; (* tcp header flags *) DoRFC1323 = TRUE; (* handle time stamp option (processing and generating) *) ProcOptions = TRUE; (* process TCP options *) GenOptions = TRUE; (* generate TCP options *) (* Flags in Connection *) AckNow = 0; (* send Ack immediately *) DelAck = 1; (* send Ack, but try to delay it *) NoDelay = 2; (* don't delay packets tocoalesce (disable Nagle algorithm) *) SentFin = 3; (* have sent Fin *) Force = 4; (* force out a byte (persist/OOB) *) RcvdScale = 5; (* set when other side sends window scale option in Syn *) RcvdTstmp = 6; (* set when other side sends timestamp option in Syn *) ReqScale = 7; (* have/will request window scale option in Syn *) ReqTstmp = 8; (* have/will request timestamp option in Syn *) DoKeepAlive = 9; (* enable keep-alive timer *) AcceptConn = 10; (* listening for incoming connections *) (*Notify = 11;*) (* socket wakeup *) (*Gone = 12;*) (* SS_NOFDREF *) (*NoMore = 13;*) (* SS_CANTRCVMORE *) Timeout = 14; NumTimers = 4; ReXmt = 0; Persist = 1; Keep = 2; MSL2 = 3; FastPeriod = 5; (* number of fast ticks per second *) SlowPeriod = 2; (* number of slow ticks per second *) TimerPeriod = 10; (* timer ticks per second *) MinTime = 1*SlowPeriod; (* minimum allowable time value *) ReXmtMax = 64*SlowPeriod; (* max allowable ReXmt value *) ReXmtThresh = 3; KeepInit = 75*SlowPeriod; (* connection establishment timer value (75s) *) KeepIntvl = 75*SlowPeriod; (* time between probes when no response (75s) *) KeepIdle = 2*60*60*SlowPeriod; (* default time before probing (2h) *) KeepCnt = 8; (* max probes before drop *) MaxIdle = KeepCnt * KeepIntvl; (* max time to send keepalive probes (10min) *) MSL = 30*SlowPeriod; (* max segment lifetime (30s) *) MaxPersistIdle = KeepIdle; (* max time to keep dead/unreachable connections (2h) *) PawsIdle = 24*24*60*60*SlowPeriod; SRTTBase = 0; (* base round trip time *) SRTTDflt = 3*SlowPeriod; (* assumed RTT if no info *) RTTShift = 3; RTTVarShift = 2; PersMin = 5*SlowPeriod; (* retransmit persistance *) PersMax = 60*SlowPeriod; (* maximum persist interval *) MSS = 536-12; (* maximum segment size for outgoing segments, 12 = size of timestamp option *) MaxRxtShift = 12; (* maximum retransmits *) MaxWin = 65535; (* largest value for (unscaled) window *) MaxWinShift = 14; (* maximum window shift *) MaxSendSpace = 80000H; (* 512KB, max. 1023MB *) MaxRecvSpace = 80000H; (* 512KB, max. 1023MB *) SegsPerBuf = 4; (* number of mss segments per send buffer (potential fragmentation waste is 1/SegsPerBuf) *) ISSInc = 128000; (* increment for iss each second *) IPTypeTCP = 6; (* TCP type code for IP packets *) MinTCPHdrLen = 20; MaxTCPHdrLen = 60; MaxPseudoHdrLen = 40; (* IPv4 = 12; IPv6 = 40 *) NewZeros = FALSE; (* NEW initializes allocated object fields to 0 *) BroadcastReceived = 3708; InvalidParameter = 3709; AllPortsInUse = 3710; AddressInUse = 3711; DuplicateSegment = 3712; DuplicatePartialSegment = 3713; DuplicateSegmentPAWS = 3714; DataBeyondWindow1 = 3715; DataBeyondWindow2 = 3716; DataBeyondWindow3 = 3717; BadChecksum = 3718; DuplicateAck = 3719; OutOfRangeAck = 3720; TimeOutKeepAlive = 3721; TimeoutEstablished = 3722; SegmentTooBig = 3723; SegmentTooSmall = 3724; BadHeaderLength = 3725; ConnectionGone = 3726; NIYNewIncarnation = 3727; NIYOutOfBand = 3728; NIYMSS = 3729; ConnectionAborted = 3730; NotInitialized = 3731; DataDuplicatePrevComplete = 3732; DataDuplicatePrevPartial = 3733; DataDuplicateNextComplete = 3734; DataDuplicateNextPartial = 3735; TYPE (* Send buffer types *) SendData = IP.Packet; SendBuffer = POINTER TO RECORD next: SendBuffer; ofs, len: LONGINT; (* data[ofs..ofs+len-1] is valid *) seq: LONGINT; (* sequence number of byte data[ofs] (only valid if len # 0) *) pf: SET; (* flags of segment *) data: SendData (* size should be multiple of maxseg *) END; TYPE ISS = OBJECT VAR iss: LONGINT; (* next iss to use *) PROCEDURE Update(hz: LONGINT); BEGIN {EXCLUSIVE} INC(iss, ISSInc DIV hz) END Update; PROCEDURE Get(): LONGINT; VAR t: LONGINT; BEGIN {EXCLUSIVE} t := iss; INC(iss, ISSInc); RETURN t END Get; PROCEDURE &Init*(iss: LONGINT); BEGIN SELF.iss := iss END Init; END ISS; TYPE Timer = OBJECT (* temporary *) VAR lastFast, lastSlow: LONGINT; (* time of last execution *) (*lastTrace: LONGINT;*) now: LONGINT; (* current tcp "time" - read from other procedures, but only updated inside this object *) timer: ActiveTimers.Timer; PROCEDURE CallDelayedAck(p: Connection); BEGIN p.DelayedAck(); END CallDelayedAck; PROCEDURE CallSlowTimer(p: Connection); BEGIN p.SlowTimer(); END CallSlowTimer; PROCEDURE HandleTimeout; VAR t: LONGINT; BEGIN {EXCLUSIVE} t := Kernel.GetTicks(); IF t - lastFast >= Kernel.second DIV FastPeriod THEN lastFast := t; pool.Enumerate(CallDelayedAck); END; IF t - lastSlow >= Kernel.second DIV SlowPeriod THEN lastSlow := t; pool.Enumerate(CallSlowTimer); issSource.Update(SlowPeriod); INC(now) END; timer.SetTimeout(HandleTimeout, Kernel.second DIV TimerPeriod) END HandleTimeout; (* Finalize timer by cancelling it *) PROCEDURE Finalize; BEGIN {EXCLUSIVE} timer.Finalize END Finalize; PROCEDURE &Init*; BEGIN now := 0; lastSlow := Kernel.GetTicks() - Kernel.second; lastFast := lastSlow; (*lastTrace := lastSlow;*) NEW(timer); timer.SetTimeout(HandleTimeout, Kernel.second DIV TimerPeriod) END Init; END Timer; TYPE (** Connection object. NOTE: Only one process should access a Connection! *) Connection* = OBJECT(Streams.Connection) VAR poolNext, parent, acceptNext: Connection; (* assigned interface *) int-: IP.Interface; (* local protocol address *) lport-: LONGINT; (* foreign protocol address *) fip-: IP.Adr; fport-: LONGINT; state*: SHORTINT; (* TCP state *) timer: ARRAY NumTimers OF LONGINT; rxtshift-: LONGINT; (* log(2) of rexmt exponential backoff *) rxtcur-: LONGINT; (* current retransmission timeout (ticks) *) dupacks-: LONGINT; (* number of consequtive duplicate acks received *) maxseg-: LONGINT; (* maximum segment size to send *) flags: SET; (* various connection and buffer flags *) error: LONGINT; (* error on connection (socket error) *) acceptable: LONGINT; (* number of connections that can be before acceptance *) (* send sequence *) snduna-: LONGINT; (* send unacknowledged *) sndnxt-: LONGINT; (* send next *) sndup: LONGINT; (* send urgent pointer *) sndwl1-: LONGINT; (* window update seg seq number *) sndwl2-: LONGINT; (* window update seg ack number *) iss-: LONGINT; (* initial send sequence number *) sndwnd-: LONGINT; (* send window *) sndmax-: LONGINT; (* highest sequence number sent - used to recognize retransmits *) (* receive sequence *) rcvwnd-: LONGINT; (* receive window *) rcvnxt-: LONGINT; (* receive next *) rcvup: LONGINT; (* receive urgent pointer *) irs-: LONGINT; (* initial receive sequence number *) rcvadv-: LONGINT; (* advertised window by other end *) (* congestion control *) sndcwnd-: LONGINT; (* congestion-controlled window *) sndssthresh-: LONGINT; (* sndcwnd threshold for slow start - exponential to linear switch *) (* transmit timing *) idle-: LONGINT; (* inactivity time *) rtt-: LONGINT; (* round trip time *) rtseq-: LONGINT; (* sequence number being timed *) srtt-: LONGINT; (* smoothed round trip time *) rttvar-: LONGINT; (* variance in round trip time *) rttmin-: LONGINT; (* minimum rtt allowed *) maxsndwnd: LONGINT; (* largest window peer has offered *) (* RFC 1323 *) sndscale: LONGINT; (* scaling for send window (0-14) *) rcvscale: LONGINT; (* scaling for receive window (0-14) *) requestrscale: LONGINT; (* our pending window scale *) requestedsscale: LONGINT; (* peer's pending window scale *) tsrecent: LONGINT; (* timestamp echo data *) tsrecentage: LONGINT; (* when last updated *) lastacksent-: LONGINT; (* sequence number of last ack field *) (* send buffer *) sndcc-: LONGINT; (* number of bytes in send buffer *) sndspace-: LONGINT; (* number of bytes that may still be added before buffer is full *) sndhead, sndtail: SendBuffer; (* queue of segments (contiguous and in order) *) sndcontig: SendData; (* maxseg size buffer to make data contiguous *) (* receive buffer *) rcvspace-: LONGINT; (* number of bytes that may still be received before buffer is considered full *) rcvhiwat-: LONGINT; (* receive high water mark (MaxRecvSpace) *) rcvhead, rcvreasm, rcvtail: Network.Buffer; (* queue of segments - see description at the beginning of this file *) rcvheadFragment: Network.Buffer; (* current fragment of rcvhead *) timeout: ActiveTimers.Timer; traceflow-: LONGINT; (* Initialization for internal use only. *) PROCEDURE &Init*; BEGIN state := Unused; END Init; (** Open a TCP connection (only use once per Connection instance). Use TCP.NilPort for lport to automatically assign an unused local port. *) PROCEDURE Open*(lport: LONGINT; fip: IP.Adr; fport: LONGINT; VAR res: WORD); BEGIN {EXCLUSIVE} ASSERT((state = Unused) & (lport >= 0) & (lport < 10000H) & (fport >= 0) & (fport < 10000H)); IF timeSource # NIL THEN InitConnection(SELF); IF (~IP.IsNilAdr(fip)) & (fport # NilPort) THEN (* active open (connect) *) int := IP.InterfaceByDstIP(fip); IF int # NIL THEN SELF.fip := fip; pool.Add(SELF, lport, fport, res); (* add connection to connection pool *) IF res = Ok THEN (* address assignment ok, now start the connection *) Machine.AtomicInc(NTCPConnectAttempt); state := SynSent; timer[Keep] := KeepInit; iss := issSource.Get(); snduna := iss; sndnxt := iss; sndmax := iss; sndup := iss; Output(SELF) END; ELSE res := NoInterface; END; ELSE (* passive open (listen) *) ASSERT((fport = NilPort) & (IP.IsNilAdr(fip))); SELF.int := NIL; SELF.fip := IP.NilAdr; pool.Add(SELF, lport, NilPort, res); IF res = Ok THEN INCL(flags, AcceptConn); acceptable := Acceptable; state := Listen; END END; IF TraceProtocol THEN TraceTCP("Open", SELF, empty^, empty^, 0, 0, 0) END ELSE res := NotInitialized; END END Open; (** Send data on a TCP connection. *) PROCEDURE Send*(CONST data: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: WORD); VAR buf: SendBuffer; len0: LONGINT; BEGIN {EXCLUSIVE} IF StrongChecks THEN Invariant(SELF) END; ASSERT(ofs+len <= LEN(data)); (* index check *) LOOP IF len <= 0 THEN EXIT END; IF len <= maxseg THEN len0 := len ELSE len0 := maxseg END; IF ~((state IN {Established, CloseWait}) & (sndspace >= len0)) THEN (* can not send immediately *) AWAIT(((state IN {Established, CloseWait}) & (sndspace >= len0)) OR ~(state IN {SynSent..CloseWait})); IF StrongChecks THEN Invariant(SELF) END; IF ~(state IN {SynSent..CloseWait}) THEN (* connection broken *) IF error # Ok THEN res := error ELSE res := NotConnected END; RETURN END END; buf := sndtail; IF LEN(buf.data^) - (buf.ofs+buf.len) >= len0 THEN (* last buffer has space for data *) IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(data[ofs]), ADDRESSOF(buf.data[buf.ofs+buf.len]), len0) ELSE Network.Copy(data, buf.data^, ofs, buf.ofs+buf.len, len0) END; INC(buf.len, len0) ELSE (* last buffer has no space for data *) buf := buf.next; IF buf # sndhead THEN (* is free buffer *) ASSERT((buf.ofs = 0) & (buf.len = 0)); (* buffer must be unused *) ASSERT(LEN(buf.data^) >= len0) (* index check *) ELSE Machine.AtomicInc(NTCPNewBufs); NEW(buf); NEW(buf.data, MSS * SegsPerBuf); IF ~NewZeros THEN buf.ofs := 0; END; buf.next := sndtail.next; sndtail.next := buf; ASSERT(LEN(buf.data^) >= len0) (* index check *) END; IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(data[ofs]), ADDRESSOF(buf.data[0]), len0) ELSE Network.Copy(data, buf.data^, ofs, 0, len0) END; buf.len := len0; sndtail := buf END; INC(sndcc, len0); DEC(sndspace, len0); Output(SELF); INC(ofs, len0); DEC(len, len0) END; IF TraceProtocol THEN TraceTCP("Send", SELF, empty^, data, 0, ofs, len) END; res := Ok END Send; (** Receive data on a TCP connection. The data parameter specifies the buffer. The ofs parameters specify the position in the buffer where data should be received (usually 0), and the size parameters specifies how many bytes of data can be received in the buffer. The min parameter specifies the minimum number of bytes to receive before Receive returns and must by <= size. The len parameter returns the number of bytes received, and the res parameter returns 0 if ok, or a non-zero error code otherwise (e.g. if the connection is closed by the communication partner, or by a call of the Close method). *) PROCEDURE Receive*(VAR data: ARRAY OF CHAR; ofs, size, min: LONGINT; VAR len: LONGINT; VAR res: WORD); VAR buf: Network.Buffer; rlen: LONGINT; BEGIN {EXCLUSIVE} IF StrongChecks THEN Invariant(SELF) END; ASSERT((ofs >= 0) & (ofs+size <= LEN(data)) & (min <= size)); (* parameter consistency check *) len := 0; LOOP WHILE (rcvhead # NIL) & (rcvhead # rcvreasm) & (size > 0) DO IF rcvhead.nextFragment = NIL THEN (* read all available data until user buffer is full *) rlen := MIN(rcvhead.len, size); IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(rcvhead.data[rcvhead.ofs]), ADDRESSOF(data[ofs]), rlen); ELSE Network.Copy(rcvhead.data, data, rcvhead.ofs, ofs, rlen); END; INC(len, rlen); INC(ofs, rlen); DEC(size, rlen); INC(rcvhead.ofs, rlen); DEC(rcvhead.len, rlen); INC(rcvhead.int, rlen); INC(rcvspace, rlen); IF rcvhead.len = 0 THEN (* go to next buffer *) buf := rcvhead; rcvhead := rcvhead.next; IF rcvhead # NIL THEN rcvhead.prev := NIL; END; Network.ReturnBuffer(buf); Output(SELF); (* enable sending window update segment *) END; ELSE (* rcvhead has fragments *) (* read all available data until user buffer is full *) IF rcvheadFragment = NIL THEN rcvheadFragment := rcvhead; END; rlen := MIN(rcvheadFragment.len, size); IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(rcvheadFragment.data[rcvheadFragment.ofs]), ADDRESSOF(data[ofs]), rlen); ELSE Network.Copy(rcvheadFragment.data, data, rcvheadFragment.ofs, ofs, rlen); END; INC(len, rlen); INC(ofs, rlen); DEC(size, rlen); INC(rcvheadFragment.ofs, rlen); DEC(rcvheadFragment.len, rlen); INC(rcvheadFragment.int, rlen); INC(rcvspace, rlen); IF rcvheadFragment.len = 0 THEN IF rcvheadFragment.nextFragment # NIL THEN (* go to next fragment *) rcvheadFragment := rcvheadFragment.nextFragment; ELSE (* go to next buffer *) buf := rcvhead; rcvhead := rcvhead.next; IF rcvhead # NIL THEN rcvhead.prev := NIL; END; Network.ReturnBuffer(buf); Output(SELF); (* enable sending window update segment *) END; END; END; END; IF size = 0 THEN (* user buffer full *) EXIT; END; IF len >= min THEN (* enough was read *) EXIT; ELSE (* await available data or closed connection state *) AWAIT(((rcvhead # NIL) & (rcvhead # rcvreasm)) OR ~(state IN {SynSent, SynReceived, Established, FinWait1, FinWait2})); IF StrongChecks THEN Invariant(SELF) END; IF (rcvhead # NIL) & (rcvhead # rcvreasm) THEN (* new data available, start again with LOOP *) ELSE (* no data available, and no more can arrive, as we've seen the FIN *) IF error # Ok THEN res := error ELSE res := Streams.EOF (* end of file *) END; RETURN; END; END; END; IF StrongChecks THEN Invariant(SELF) END; IF TraceProtocol THEN TraceTCP("Receive", SELF, empty^, data, 0, ofs, len) END; res := Ok END Receive; (** Enable or disable delayed send (Nagle algorithm). If enabled, the sending of a segment is delayed if it is not filled by one call to Send, in order to be able to be filled by further calls to Send. This is the default option. If disabled, a segment is sent immediatly after a call to Send, even if it is not filled. This option is normally chosen by applications like telnet or VNC client, which send verly little data but shall not be delayed. *) PROCEDURE DelaySend*(enable: BOOLEAN); BEGIN {EXCLUSIVE} IF enable THEN EXCL(flags, NoDelay); ELSE INCL(flags, NoDelay); END; END DelaySend; (** Enable or disable keep-alive. (default: disabled) *) PROCEDURE KeepAlive*(enable: BOOLEAN); BEGIN {EXCLUSIVE} IF enable THEN INCL(flags, DoKeepAlive); ELSE EXCL(flags, DoKeepAlive); END; END KeepAlive; (** Return number of bytes that may be read without blocking. *) PROCEDURE Available*(): LONGINT; VAR len: LONGINT; item: Network.Buffer; fragmentBuffer: Network.Buffer; reassembledLength: LONGINT; BEGIN {EXCLUSIVE} len := 0; item := rcvhead; WHILE(item # NIL) & (item # rcvreasm) DO IF item.nextFragment # NIL THEN INC(len, item.len); ELSE (* fragmented packet *) fragmentBuffer := item; reassembledLength := 0; WHILE fragmentBuffer # NIL DO INC(len, fragmentBuffer.len); fragmentBuffer := fragmentBuffer.nextFragment; END; END; item := item.next; END; RETURN len; END Available; (** Return connection state. *) PROCEDURE State*(): LONGINT; BEGIN {EXCLUSIVE} IF (state IN FinStates) & (rcvhead # NIL) & (rcvhead.len # 0) THEN (* workaround for client errors *) IF state = CloseWait THEN (* act as if we haven't seen a FIN yet *) RETURN Established ELSE RETURN FinWait1 END ELSE RETURN state END END State; (** Wait until the connection state is either in the good or bad set, up to "ms" milliseconds. *) PROCEDURE AwaitState*(good, bad: SET; ms: LONGINT; VAR res: WORD); BEGIN {EXCLUSIVE} IF ~(state IN (good+bad)) THEN IF ms # -1 THEN IF timeout = NIL THEN NEW(timeout) END; timeout.SetTimeout(SELF.HandleTimeout, ms); END; EXCL(flags, Timeout); AWAIT((state IN (good+bad)) OR (Timeout IN flags)); IF ms # -1 THEN timeout.CancelTimeout(); END END; IF state IN good THEN res := Ok ELSIF state IN bad THEN res := NotConnected ELSE res := TimedOut END END AwaitState; PROCEDURE HandleTimeout; BEGIN {EXCLUSIVE} INCL(flags, Timeout) END HandleTimeout; (** Close a TCP connection (half-close). *) PROCEDURE Close*; BEGIN {EXCLUSIVE} IF state < Established THEN CloseConnection(SELF) ELSIF FALSE (* linger *) THEN Drop(SELF, 0) ELSE UsrClosed(SELF); IF state # Closed THEN Output(SELF) END END; IF TraceProtocol THEN TraceTCP("Close", SELF, empty^, empty^, 0, 0, 0) END END Close; (** Discard a TCP connection (shutdown). *) PROCEDURE Discard*; BEGIN {EXCLUSIVE} IF state < Established THEN CloseConnection(SELF) ELSE Drop(SELF, ConnectionReset) (* ??? *) END; IF TraceProtocol THEN TraceTCP("Discard", SELF, empty^, empty^, 0, 0, 0) END END Discard; (** Accept a client waiting on a listening connection. Blocks until a client is available or the connection is closed. *) PROCEDURE Accept*(VAR client: Connection; VAR res: WORD); BEGIN {EXCLUSIVE} AWAIT((state # Listen) OR (acceptNext # NIL)); IF acceptNext # NIL THEN client := acceptNext; acceptNext := acceptNext.acceptNext; INC(acceptable); res := Ok ELSE client := NIL; res := ConnectionRefused END END Accept; (** Return TRUE iff a listening connection has clients waiting to be accepted. *) PROCEDURE Requested*(): BOOLEAN; BEGIN {EXCLUSIVE} RETURN (state = Listen) & (acceptNext # NIL) END Requested; (* Process a received segment for the current Connection. *) PROCEDURE Input(int: IP.Interface; fip: IP.Adr; hdrLen: LONGINT; buffer: Network.Buffer); VAR bufferQueued: BOOLEAN; (* was buffer queued by ProcessInput() ? *) p: Connection; BEGIN {EXCLUSIVE} (* to do: move header prediction code here *) IF StrongChecks THEN Invariant(SELF) END; bufferQueued := FALSE; IF AcceptConn IN flags THEN IF acceptable > 0 THEN NEW(p); InitConnection(p); (* fig. 28.7 *) p.int := int; p.fip := fip; p.state := Listen; p.parent := SELF; ProcessInput(p, hdrLen, buffer, TRUE, bufferQueued); IF p.state = SynReceived THEN (* packet was accepted *) DEC(acceptable) (* limit number of "temporary" Connections *) END ELSE Machine.AtomicInc(NTCPUnacceptable) END ELSE IF SELF = nilpcb THEN (* set info for "Respond" *) SELF.int := int; SELF.fport := Network.GetNet2(buffer.data, buffer.ofs); SELF.lport := Network.GetNet2(buffer.data, buffer.ofs+2); END; IF SELF.int # int THEN (* packet must be received by interface attached to this connection *) Error(WrongInterface, 0, SELF); ELSE SELF.fip := fip; ProcessInput(SELF, hdrLen, buffer, FALSE, bufferQueued); END; END; IF StrongChecks THEN Invariant(SELF) END; IF ~bufferQueued THEN Network.ReturnBuffer(buffer); END; END Input; (* Schedule a delayed ack. *) PROCEDURE DelayedAck; BEGIN {EXCLUSIVE} IF StrongChecks THEN Invariant(SELF) END; IF DelAck IN flags THEN flags := (flags - {DelAck}) + {AckNow}; Machine.AtomicInc(NTCPDelAck); Output(SELF) END END DelayedAck; (* Schedule a slow timer event (fig. 25.8). *) PROCEDURE SlowTimer; VAR dropit: BOOLEAN; oldie: LONGINT; BEGIN {EXCLUSIVE} oldie := sndnxt; IF StrongChecks THEN Invariant(SELF) END; IF Expired(timer[ReXmt]) THEN (* fig. 25.26 *) INC(rxtshift); IF rxtshift > MaxRxtShift THEN rxtshift := MaxRxtShift; Error(TimedOut, 0, SELF); Drop(SELF, TimedOut) ELSE Machine.AtomicInc(NTCPReXmtTimer); RangeSet(rxtcur, (ASH(srtt, -RTTShift) + rttvar) * backoff[rxtshift], rttmin, ReXmtMax); timer[ReXmt] := rxtcur; IF rxtshift > MaxRxtShift DIV 4 THEN (* to do: drop current route *) INC(rttvar, ASH(srtt, -RTTShift)); srtt := 0 END; sndnxt := snduna; rtt := 0; sndcwnd := maxseg; dupacks := 0; sndssthresh := MAX(MIN(sndwnd, sndcwnd) DIV 2 DIV maxseg, 2) * maxseg; IF TraceCongestion THEN KernelLog.String("ST sndssthresh := "); KernelLog.Int(sndssthresh, 1); KernelLog.Ln END; Output(SELF) END END; IF Expired(timer[Persist]) THEN (* fig. 25.13 *) Machine.AtomicInc(NTCPPersistTimer); IF (rxtshift = MaxRxtShift) & ((idle >= MaxPersistIdle) OR (idle >= (ASH(srtt, -RTTShift) + rttvar) * totbackoff)) THEN Machine.AtomicInc(NTCPPersistDrop); (* vol. 3 fig. 14.17 *) Drop(SELF, TimedOut) ELSE SetPersist(SELF); INCL(flags, Force); Output(SELF); EXCL(flags, Force) END END; traceflow := 0; IF Expired(timer[Keep]) THEN (* fig. 25.16 *) Machine.AtomicInc(NTCPKeepTimer); dropit := FALSE; IF state >= Established THEN IF (DoKeepAlive IN flags) & (state <= CloseWait) THEN IF idle < KeepIdle + MaxIdle THEN traceflow := 1; Machine.AtomicInc(NTCPKeepProbe); Respond(SELF, rcvnxt, snduna-1, {}); timer[Keep] := KeepIntvl ELSE traceflow := 2; dropit := TRUE; Error(TimeOutKeepAlive, 0, SELF) END ELSE traceflow := 3; timer[Keep] := KeepIdle END ELSE traceflow := 4; dropit := TRUE; Error(TimeoutEstablished, 0, SELF) END; IF dropit THEN Drop(SELF, TimedOut) END END; IF Expired(timer[MSL2]) THEN (* fig. 25.10 *) IF (state # TimeWait) & (idle <= MaxIdle) THEN timer[MSL2] := KeepIntvl ELSE IF state = FinWait2 THEN Machine.AtomicInc(NTCPFinWait2Timer) ELSE Machine.AtomicInc(NTCPTimeWaitTimer) END; CloseConnection(SELF) END END; INC(idle); IF rtt # 0 THEN INC(rtt) END; (* check if interface wasn't closed/removed in meantime *) IF (int # NIL) & (int.closed) THEN Drop(SELF, InterfaceClosed); END; END SlowTimer; (* Finalize the Connection object *) PROCEDURE Finalize; BEGIN IF timeout # NIL THEN timeout.Finalize; END; Discard(); END Finalize; END Connection; TYPE ConnectionHandler* = PROCEDURE {DELEGATE} (p: Connection); ConnectionPool* = OBJECT VAR eport: LONGINT; table: ARRAY HashTableSize OF Connection; (* Initialization for internal use only. *) PROCEDURE &Init*; VAR i: LONGINT; BEGIN FOR i:= 0 TO HashTableSize-1 DO table[i] := NIL; END; eport := MinEphemeralPort; END Init; (* Finalize all connections in this pool *) PROCEDURE Finalize; VAR i: LONGINT; BEGIN FOR i:= 0 TO HashTableSize-1 DO WHILE table[i] # NIL DO table[i].Finalize(); END; END; END Finalize; (* Look for the specified Connection. *) PROCEDURE Lookup(lport, fport: LONGINT; fip: IP.Adr): Connection; VAR item: Connection; BEGIN item := table[HashPool(lport, fport, fip)]; WHILE (item # NIL) & ((~IP.AdrsEqual(item.fip, fip)) OR (item.fport # fport) OR (item.lport # lport)) DO item := item.poolNext; END; IF item = NIL THEN RETURN nilpcb; ELSE RETURN item; END; END Lookup; (** Enumerate all Connections. Only for tracing, due to concurrent updates data may be stale. *) PROCEDURE Enumerate*(handle: ConnectionHandler); VAR i: LONGINT; item: Connection; BEGIN FOR i:= 0 TO HashTableSize-1 DO item := table[i]; WHILE item # NIL DO handle(item); item := item.poolNext; END; END; END Enumerate; (* Add the connection p to the pool (attach). Assumes the caller has exclusive access to p. IF (fport = NilPort) & (fip = IP.NilAdr), a listening connection is assumed. ELSE an active open is assumed. IF (lport = NilPort), an ephemeral port is assigned. *) PROCEDURE Add(p: Connection; lport, fport: LONGINT; VAR res: WORD); VAR i, sport: LONGINT; BEGIN {EXCLUSIVE} IF ((fport # NilPort) & (IP.IsNilAdr(p.fip))) OR (* workaround for XOR *) ((fport = NilPort) & (~IP.IsNilAdr(p.fip))) THEN (* both must be "nil" or both must not be "nil" *) res := InvalidParameter; Error(res, 0, p); RETURN; END; IF lport = NilPort THEN (* find an ephemeral port *) sport := eport; LOOP lport := eport; INC(eport); IF eport > MaxEphemeralPort THEN eport := MinEphemeralPort; END; IF Lookup(lport, fport, p.fip) = nilpcb THEN (* found port that is not in use *) EXIT; END; IF eport = sport THEN res := AllPortsInUse; Error(res, 0, p); RETURN; END; END; ELSE IF Lookup(lport, fport, p.fip) # nilpcb THEN res := AddressInUse; Error(res, 0, p); RETURN; END; END; p.lport := lport; p.fport := fport; (* add to pool *) i := HashPool(lport, fport, p.fip); p.poolNext := table[i]; table[i] := p; res := Ok; END Add; (* Remove a Connection from the queue (detach), making its address re-usable. *) PROCEDURE Remove(p: Connection); VAR i: LONGINT; item: Connection; BEGIN {EXCLUSIVE} i := HashPool(p.lport, p.fport, p.fip); IF table[i] # NIL THEN IF table[i] = p THEN (* remove first item *) table[i] := table[i].poolNext; RETURN; ELSE (* search list for connection *) item := table[i]; WHILE (item.poolNext # NIL) & (item.poolNext # p) DO item := item.poolNext; END; IF item.poolNext # NIL THEN (* found - remove *) item.poolNext := item.poolNext.poolNext; RETURN; END; END; END; Error(ConnectionGone, 0, p); (* pcb gone, e.g. Rst received *) END Remove; END ConnectionPool; TYPE (** Install a procedure to be called when no matching port was found for an incoming connection request. The buffer MUST NOT be returned by the listener, it is returned automatically by the caller afterwards. *) PacketDumpListener* = PROCEDURE (fip: IP.Adr; buffer: Network.Buffer); VAR pool*: ConnectionPool; (* pool of all Connections *) timeSource: Timer; (* global timer *) issSource: ISS; (* source for ISS numbers *) lastpcb: Connection; (* cache last used pcb (never NIL). Note that it is possible that a deleted pcb can be ressurrected by a packet arriving for it. As soon as a packet for another connection arrives, the deleted pcb will vanish. *) nilpcb: Connection; (* never NIL *) empty: SendData; (* never NIL *) backoff: ARRAY MaxRxtShift+1 OF LONGINT; (* exponential backoff multipliers *) totbackoff: LONGINT; outflags: ARRAY NumStates OF SET; (* output header flags *) (* TCP counters *) NTCPError-: ARRAY NumErrors OF LONGINT; NTCPConnectAttempt-, NTCPPersistTimer-, NTCPFinWait2Timer-, NTCPSendProbe-, NTCPReXmtPack-, NTCPReXmtByte-, NTCPSendPack-, NTCPSendByte-, NTCPAcks-, NTCPSendCtrl-, NTCPSendUrg-, NTCPSendWinUp-, NTCPSegsTimed-, NTCPSendTotal-, NTCPKeepTimer-, NTCPKeepProbe-, NTCPReXmtTimer-, NTCPRcvTotal-, NTCPRcvOptions-, NTCPCacheMiss-, NTCPPredAck-, NTCPAckPack-, NTCPAckByte-, NTCPPredData-, NTCPRcvPackFast-, NTCPRcvByteFast-, NTCPConnects-, NTCPRcvWinProbe-, NTCPDrops-, NTCPRcvWinUpd-, NTCPRTTUpdated-, NTCPDelAck-, NTCPConnDrops-, NTCPClosed-, NTCPSplitBuffer-, NTCPRcvPackSlow-, NTCPRcvByteSlow-, NTCPNewBufs-, NTCPTimeWaitTimer-, NTCPUnacceptable-, NTCPAccepts-, NTCPPersistDrop-: LONGINT; trace: BOOLEAN; packetDumpListener : PacketDumpListener; (* Trace installer *) (** Install a procedure to be called when no matching port was found for an incoming connection request. The buffer doesn't have to be returned by the listener, it is returned automatically by the caller afterwards. *) PROCEDURE SetDefaultListener*(pdl : PacketDumpListener); BEGIN packetDumpListener := pdl END SetDefaultListener; (* --- Utility procedures. *) PROCEDURE Invariant(p: Connection); VAR rcvbuf: Network.Buffer; sndbuf: SendBuffer; found: BOOLEAN; BEGIN IF StrongChecks & (p # nilpcb) THEN (* receive buffers *) rcvbuf := p.rcvhead; IF rcvbuf # NIL THEN (* not empty *) ASSERT((rcvbuf.len > 0) OR (Fin IN rcvbuf.set)); ASSERT(rcvbuf.prev = NIL); IF rcvbuf.next = NIL THEN (* the only buffer *) ASSERT(p.rcvtail = rcvbuf); ELSE rcvbuf := rcvbuf.next; IF rcvbuf.next = NIL THEN (* 2 buffers *) ASSERT(rcvbuf.prev.next = rcvbuf); ASSERT(p.rcvtail = rcvbuf); ELSE (* check chain (more than 2 buffers *) REPEAT ASSERT(rcvbuf.next.prev = rcvbuf); ASSERT(rcvbuf.prev.next = rcvbuf); rcvbuf := rcvbuf.next; UNTIL rcvbuf.next = NIL; (* last buffer *) ASSERT(p.rcvtail = rcvbuf); END; END; END; (* send buffers *) sndbuf := p.sndhead; found := FALSE; LOOP found := found OR (sndbuf = p.sndtail); sndbuf := sndbuf.next; ASSERT(sndbuf # NIL); IF sndbuf = p.sndhead THEN EXIT END; ASSERT(~found OR ((sndbuf.ofs = 0) & (sndbuf.len = 0))) END; ASSERT(found); END END Invariant; (* Hash function for ConnectionPool *) PROCEDURE HashPool(lport, fport: LONGINT; fip:IP.Adr): LONGINT; VAR i: LONGINT; hash: LONGINT; BEGIN (* hash := (lport + fport + fip) MOD HashTableSize; *) hash := lport + fport; CASE fip.usedProtocol OF IP.IPv4: INC(hash, fip.ipv4Adr); |IP.IPv6: FOR i := 0 TO 15 DO INC(hash, ORD(fip.ipv6Adr[i])); END; ELSE END; RETURN hash MOD HashTableSize; END HashPool; (* (* Hash function for ConnectionPool. *) PROCEDURE -HashPool(lport, fport: LONGINT; fip: IP.Adr): LONGINT; CODE {SYSTEM.i386} (* hash := (lport + fport + fip) MOD HashTableSize; *) POP ECX POP EBX POP EAX ; Convert IP to host byte order XCHG CL, CH ROL ECX, 16 XCHG CL, CH ; Calculate sum ADD EAX, EBX ADD EAX, ECX ; MOD operation MOV EBX, HashTableSize XOR EDX, EDX DIV EBX MOV EAX, EDX END HashPool; *) (* Set x to val, but keep it between min and max. *) PROCEDURE RangeSet(VAR x: LONGINT; val, min, max: LONGINT); BEGIN IF val < min THEN x := min ELSIF val > max THEN x := max ELSE x := val END END RangeSet; (* PROCEDURE -Min(a, b: LONGINT): LONGINT; CODE {SYSTEM.i386} POP EBX POP EAX CMP EAX, EBX JLE end MOV EAX, EBX end: END Min; PROCEDURE -Max(a, b: LONGINT): LONGINT; CODE {SYSTEM.i386} POP EBX POP EAX CMP EAX, EBX JGE end MOV EAX, EBX end: END Max; *) PROCEDURE WriteTime(t: LONGINT); VAR s: ARRAY 8 OF CHAR; BEGIN KernelLog.Int(t DIV Kernel.second, 1); s[0] := "."; t := (t MOD Kernel.second)*1000 DIV Kernel.second; s[1] := CHR(48+t DIV 100 MOD 10); s[2] := CHR(48+t DIV 10 MOD 10); s[3] := CHR(48+t MOD 10); s[4] := 0X; KernelLog.String(s) END WriteTime; (* TCP error function. *) PROCEDURE Error(err: WORD; n: LONGINT; p: Connection); BEGIN IF trace THEN KernelLog.Enter; KernelLog.String("TCP: "); WriteTime(Kernel.GetTicks()); KernelLog.String(" result "); KernelLog.Int(err, 1); KernelLog.Char(" "); KernelLog.Int(n, 1); KernelLog.Exit END; IF TraceError & (p # NIL) THEN TraceTCP("", p, empty^, empty^, 0, 0, 0) END; Machine.AtomicInc(NTCPError[0]); Machine.AtomicInc(NTCPError[err-MinError]) END Error; (* Calculate and store new value for persist timer. *) PROCEDURE SetPersist(p: Connection); BEGIN ASSERT(p.timer[ReXmt] = 0); RangeSet(p.timer[Persist], (p.srtt DIV 4 + p.rttvar) DIV 2 * backoff[p.rxtshift], PersMin, PersMax); IF p.rxtshift < MaxRxtShift THEN INC(p.rxtshift) END END SetPersist; (* Trace Connection fields. *) PROCEDURE TraceConnection(p: Connection); VAR i: LONGINT; BEGIN IF Trace THEN KernelLog.String(" state="); CASE p.state OF Closed: KernelLog.String("Closed") |Listen: KernelLog.String("Listen") |SynSent: KernelLog.String("SynSent") |SynReceived: KernelLog.String("SynReceived") |Established: KernelLog.String("Established") |CloseWait: KernelLog.String("CloseWait") |FinWait1: KernelLog.String("FinWait1") |Closing: KernelLog.String("Closing") |LastAck: KernelLog.String("LastAck") |FinWait2: KernelLog.String("FinWait2") |TimeWait: KernelLog.String("TimeWait") END; KernelLog.String(" maxseg="); KernelLog.Int(p.maxseg, 1); KernelLog.String(" flags={"); IF AckNow IN p.flags THEN KernelLog.String(" AckNow") END; IF DelAck IN p.flags THEN KernelLog.String(" DelAck") END; IF NoDelay IN p.flags THEN KernelLog.String(" NoDelay") END; IF SentFin IN p.flags THEN KernelLog.String(" SentFin") END; IF Force IN p.flags THEN KernelLog.String(" Force") END; IF RcvdScale IN p.flags THEN KernelLog.String(" RcvdScale") END; IF RcvdTstmp IN p.flags THEN KernelLog.String(" RcvdTstmp") END; IF ReqScale IN p.flags THEN KernelLog.String(" ReqScale") END; IF ReqTstmp IN p.flags THEN KernelLog.String(" ReqTstmp") END; IF DoKeepAlive IN p.flags THEN KernelLog.String(" DoKeepAlive") END; IF AcceptConn IN p.flags THEN KernelLog.String(" AcceptConn") END; FOR i := 11 TO 31 DO IF i IN p.flags THEN KernelLog.Char(" "); KernelLog.Int(i, 1) END END; KernelLog.String(" } error="); KernelLog.Int(p.error, 1); KernelLog.Ln; KernelLog.String(" iss="); KernelLog.Int(p.iss, 1); KernelLog.String(" snduna="); KernelLog.Int(p.snduna-p.iss, 1); KernelLog.String(" sndnxt="); KernelLog.Int(p.sndnxt-p.iss, 1); KernelLog.String(" sndmax="); KernelLog.Int(p.sndmax-p.iss, 1); KernelLog.String(" sndup="); KernelLog.Int(p.sndup-p.iss, 1); KernelLog.String(" sndwl2="); KernelLog.Int(p.sndwl2-p.iss, 1); KernelLog.String(" rtseq="); KernelLog.Int(p.rtseq-p.iss, 1); KernelLog.Ln; KernelLog.String(" sndwnd="); KernelLog.Int(p.sndwnd, 1); KernelLog.String(" sndcwnd="); KernelLog.Int(p.sndcwnd, 1); KernelLog.String(" sndcc="); KernelLog.Int(p.sndcc, 1); KernelLog.String(" sndspace="); KernelLog.Int(p.sndspace, 1); KernelLog.String(" sndssthresh="); KernelLog.Int(p.sndssthresh, 1); KernelLog.Ln; KernelLog.String(" irs="); KernelLog.Int(p.irs, 1); KernelLog.String(" rcvnxt="); KernelLog.Int(p.rcvnxt-p.irs, 1); KernelLog.String(" rcvup="); KernelLog.Int(p.rcvup-p.irs, 1); KernelLog.String(" sndwl1="); KernelLog.Int(p.sndwl1-p.irs, 1); KernelLog.String(" rcvadv="); KernelLog.Int(p.rcvadv-p.irs, 1); KernelLog.String(" lastacksent="); KernelLog.Int(p.lastacksent-p.irs, 1); KernelLog.Ln; KernelLog.String(" rcvwnd="); KernelLog.Int(p.rcvwnd, 1); KernelLog.String(" rcvspace="); KernelLog.Int(p.rcvspace, 1); KernelLog.String(" rcvhiwat="); KernelLog.Int(p.rcvhiwat, 1); KernelLog.String(" idle="); KernelLog.Int(p.idle, 1); KernelLog.String(" rtt="); KernelLog.Int(p.rtt, 1); KernelLog.String(" srtt="); KernelLog.Int(p.srtt, 1); KernelLog.Ln; KernelLog.String(" rttvar="); KernelLog.Int(p.rttvar, 1); KernelLog.String(" rttmin="); KernelLog.Int(p.rttmin, 1); KernelLog.String(" maxsndwnd="); KernelLog.Int(p.maxsndwnd, 1); KernelLog.String(" sndscale="); KernelLog.Int(p.sndscale, 1); KernelLog.String(" rcvscale="); KernelLog.Int(p.rcvscale, 1); KernelLog.String(" requestrscale="); KernelLog.Int(p.requestrscale, 1); KernelLog.Ln; KernelLog.String(" requestedsscale="); KernelLog.Int(p.requestedsscale, 1); KernelLog.String(" tsrecent="); KernelLog.Int(p.tsrecent, 1); KernelLog.String(" tsrecentage="); KernelLog.Int(p.tsrecentage, 1); KernelLog.String(" rxtshift="); KernelLog.Int(p.rxtshift, 1); KernelLog.String(" rxtcur="); KernelLog.Int(p.rxtcur, 1); KernelLog.String(" dupacks="); KernelLog.Int(p.dupacks, 1); KernelLog.Ln; KernelLog.Ln END END TraceConnection; (* Trace the protocol. *) PROCEDURE TraceTCP(msg: ARRAY OF CHAR; p: Connection; CONST hdr, data: ARRAY OF CHAR; hlen, ofs, len: LONGINT); BEGIN IF Trace THEN WriteTime(Kernel.GetTicks()); KernelLog.Char(" "); KernelLog.String(msg); TraceConnection(p); IF TracePacket THEN KernelLog.Memory(ADDRESSOF(hdr[0]), hlen); KernelLog.Memory(ADDRESSOF(data[ofs]), len) END END END TraceTCP; (* Output a TCP segment. *) PROCEDURE Output(p: Connection); VAR idle, sendalot: BOOLEAN; optLen, off, sum, win, len, adv, x, startseq, left: LONGINT; pf: SET; buf: SendBuffer; data: SendData; pseudoHdr: ARRAY MaxPseudoHdrLen OF CHAR; hdr: ARRAY MaxTCPHdrLen OF CHAR; pseudoHdrLen: LONGINT; BEGIN idle := (p.sndmax = p.snduna); IF idle & (p.idle >= p.rxtcur) THEN p.sndcwnd := p.maxseg END; REPEAT sendalot := FALSE; LOOP (* look for reason to send a segment (not a real loop, just a goto-substitute) *) off := p.sndnxt - p.snduna; (* the first off bytes from the buffer have been sent and are waiting for Ack *) win := p.sndwnd; (* minimum of window advertised by receiver and the congestion window *) IF HandleCongestion & (p.sndcwnd < win) THEN win := p.sndcwnd END; pf := outflags[p.state]; IF Force IN p.flags THEN IF win = 0 THEN IF off < p.sndcc THEN EXCL(pf, Fin) END; win := 1 ELSE p.timer[Persist] := 0; p.rxtshift := 0 END END; len := p.sndcc; (* minimum of bytes in send buffer and win *) IF win < len THEN len := win END; DEC(len, off); (* off bytes have already been sent and are waiting for Ack *) IF len < 0 THEN len := 0; IF win = 0 THEN p.timer[ReXmt] := 0; p.sndnxt := p.snduna END; END; IF len > p.maxseg THEN len := p.maxseg; sendalot := TRUE END; IF (p.sndnxt + len) - (p.snduna + p.sndcc) < 0 THEN EXCL(pf, Fin) END; (* not emptying send buffer *) win := p.rcvspace; (* now win is receive window advertised *) IF len # 0 THEN IF len = p.maxseg THEN EXIT END; IF (idle OR (NoDelay IN p.flags)) & (len + off >= p.sndcc) THEN EXIT END; IF Force IN p.flags THEN EXIT END; IF len >= p.maxsndwnd DIV 2 THEN EXIT END; IF p.sndnxt - p.sndmax < 0 THEN EXIT END END; IF win > 0 THEN adv := ASH(MaxWin, p.rcvscale); IF win < adv THEN adv := win END; DEC(adv, p.rcvadv - p.rcvnxt); IF adv >= 2*p.maxseg THEN EXIT END; IF 2*adv >= p.rcvhiwat THEN EXIT END END; IF AckNow IN p.flags THEN EXIT END; IF pf * {Syn,Rst} # {} THEN EXIT END; IF p.sndup - p.snduna > 0 THEN EXIT END; IF (Fin IN pf) & (~(SentFin IN p.flags) OR (p.sndnxt = p.snduna)) THEN EXIT END; IF (p.sndcc # 0) & (p.timer[ReXmt] = 0) & (p.timer[Persist] = 0) THEN p.rxtshift := 0; SetPersist(p) END; RETURN (* 0 *) (* no reason to send a segment *) END; (* LOOP *) (* form output segment *) optLen := 0; IF Syn IN pf THEN p.sndnxt := p.iss; IF GenOptions THEN (* generate MSS option *) hdr[MinTCPHdrLen+optLen] := 2X; (* MSS option *) hdr[MinTCPHdrLen+optLen+1] := 4X; (* option length *) Network.PutNet2(hdr, MinTCPHdrLen+optLen+2, p.int.dev.mtu-120); (* MSS = dev.mtu-120 *) INC(optLen, 4); (* generate window scale option *) IF ((ReqScale IN p.flags) & (~(Ack IN pf) OR (RcvdScale IN p.flags))) THEN hdr[MinTCPHdrLen+optLen] := 1X; (* NOP *) hdr[MinTCPHdrLen+optLen+1] := 3X; (* window scale option *) hdr[MinTCPHdrLen+optLen+2] := 3X; (* option length *) hdr[MinTCPHdrLen+optLen+3] := CHR(p.requestrscale); (* window scale *) INC(optLen, 4); END; END; END; (* generate timestamp option *) IF GenOptions & DoRFC1323 & (ReqTstmp IN p.flags) & ~(Rst IN pf) & ((pf * {Syn, Ack} = {Syn}) OR (RcvdTstmp IN p.flags)) THEN hdr[MinTCPHdrLen+optLen] := 1X; (* NOP *) hdr[MinTCPHdrLen+optLen+1] := 1X; (* NOP *) hdr[MinTCPHdrLen+optLen+2] := 8X; (* timestamp option *) hdr[MinTCPHdrLen+optLen+3] := 0AX; (* option length *) Network.PutNet4(hdr, MinTCPHdrLen+optLen+4, timeSource.now); Network.PutNet4(hdr, MinTCPHdrLen+optLen+8, p.tsrecent); INC(optLen, 12); END; (* This doesn't work if Fin was set before !! (bug in TCP/IP Illustrated Vol. 2, p. 873, fig. 26.24) Solved by setting MSS = 536 - 12 (size of timestamp option) and commenting this out. (mvt, 28.02.2004) IF len > p.maxseg - optLen THEN len := p.maxseg - optLen; sendalot := TRUE; END; *) IF len # 0 THEN IF (Force IN p.flags) & (len = 1) THEN Machine.AtomicInc(NTCPSendProbe) ELSIF p.sndnxt - p.sndmax < 0 THEN Machine.AtomicInc(NTCPReXmtPack); Machine.AtomicAdd(NTCPReXmtByte, len) ELSE Machine.AtomicInc(NTCPSendPack); Machine.AtomicAdd(NTCPSendByte, len) END; IF off + len = p.sndcc THEN INCL(pf, Psh) END; (* data to send is in buffer[off..off+len-1] *) buf := p.sndhead; WHILE off >= buf.len DO DEC(off, buf.len); buf := buf.next END; IF off+len <= buf.len THEN (* all data is this buffer *) data := buf.data; INC(off, buf.ofs) ELSE (* data is spread over more buffers *) Machine.AtomicInc(NTCPSplitBuffer); data := p.sndcontig; ASSERT(len <= LEN(data^)); ASSERT(buf.len-off <= len); (* index check *) IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(buf.data[off+buf.ofs]), ADDRESSOF(data[0]), buf.len-off) ELSE Network.Copy(buf.data^, data^, off+buf.ofs, 0, buf.len-off) END; off := buf.len-off; left := len-off; WHILE left # 0 DO buf := buf.next; IF left <= buf.len THEN x := left ELSE x := buf.len END; ASSERT(off+x <= len); (* index check *) IF SystemMove THEN SYSTEM.MOVE(ADDRESSOF(buf.data[buf.ofs]), ADDRESSOF(data[off]), x) ELSE Network.Copy(buf.data^, data^, buf.ofs, off, x) END; INC(off, x); DEC(left, x) END; off := 0 END ELSE IF AckNow IN p.flags THEN Machine.AtomicInc(NTCPAcks) ELSIF pf * {Syn,Fin,Rst} # {} THEN Machine.AtomicInc(NTCPSendCtrl) ELSIF p.sndup - p.snduna > 0 THEN Machine.AtomicInc(NTCPSendUrg) ELSE Machine.AtomicInc(NTCPSendWinUp) END; data := empty; off := 0 END; IF (Fin IN pf) & (SentFin IN p.flags) & (p.sndnxt = p.sndmax) THEN DEC(p.sndnxt) END; IF (len # 0) OR (pf * {Syn,Fin} # {}) OR (p.timer[Persist] # 0) THEN Network.PutNet4(hdr, 4, p.sndnxt) (* sequence number *) ELSE Network.PutNet4(hdr, 4, p.sndmax) END; Network.PutNet4(hdr, 8, p.rcvnxt); (* acknowledgement number *) IF (win < p.rcvhiwat DIV 4) & (win < p.maxseg) THEN win := 0 END; IF win > ASH(MaxWin, p.rcvscale) THEN win := ASH(MaxWin, p.rcvscale) END; IF win < p.rcvadv - p.rcvnxt THEN win := p.rcvadv - p.rcvnxt END; Network.PutNet2(hdr, 14, ASH(win, -p.rcvscale)); IF p.sndup - p.sndnxt > 0 THEN x := p.sndup - p.sndnxt; IF x > 65535 THEN x := 65535 END; Network.PutNet2(hdr, 18, x); INCL(pf, Urg) ELSE p.sndup := p.snduna END; hdr[13] := CHR(SHORT(SHORT(SYSTEM.VAL(LONGINT, pf)))); (* set rest of TCP header *) Network.PutNet2(hdr, 0, p.lport); Network.PutNet2(hdr, 2, p.fport); hdr[12] := CHR((MinTCPHdrLen+optLen) DIV 4*10H); Network.Put2(hdr, 16, 0); (* checksum := 0; *) IF ~(Network.ChecksumTCP IN p.int.dev.calcChecksum) THEN (* set pseudo header *) pseudoHdrLen := p.int.WritePseudoHeader(pseudoHdr, p.int.localAdr, p.fip, IPTypeTCP, MinTCPHdrLen+optLen+len); sum := IP.Checksum1(pseudoHdr, 0, pseudoHdrLen, 0); sum := IP.Checksum1(hdr, 0, MinTCPHdrLen+optLen, sum); sum := IP.Checksum2(data^, off, len, sum); Network.Put2(hdr, 16, sum); (* checksum := sum *) END; IF ~(Force IN p.flags) OR (p.timer[Persist] = 0) THEN startseq := p.sndnxt; IF pf * {Syn,Fin} # {} THEN IF Syn IN pf THEN INC(p.sndnxt) END; IF Fin IN pf THEN INC(p.sndnxt); INCL(p.flags, SentFin) END END; INC(p.sndnxt, len); IF p.sndnxt - p.sndmax > 0 THEN p.sndmax := p.sndnxt; IF p.rtt = 0 THEN p.rtt := 1; p.rtseq := startseq; Machine.AtomicInc(NTCPSegsTimed) END END; IF (p.timer[ReXmt] = 0) & (p.sndnxt # p.snduna) THEN p.timer[ReXmt] := p.rxtcur; IF p.timer[Persist] # 0 THEN p.timer[Persist] := 0; p.rxtshift := 0 END END ELSIF (p.sndnxt + len) - p.sndmax > 0 THEN p.sndmax := p.sndnxt + len ELSE (* skip *) END; IF TraceProtocol THEN TraceTCP("Output", p, hdr, data^, MinTCPHdrLen+optLen, off, len) END; (* Send packet *) p.int.Send(IPTypeTCP, p.fip, hdr, data^, MinTCPHdrLen+optLen, off, len, IP.MaxTTL); (* old code: IP.IPOutput(IP.default, p.hdr, data^, IP.MinIPHdrLen+MinTCPHdrLen, off, len); IF FALSE THEN (* error in IPOutput *) IF FALSE THEN (* out of buffers *) p.sndcwnd := p.maxseg; (* close congestion window *) RETURN (* 0 *) END; IF FALSE & (p.state >= SynReceived) THEN (* host unreachable or network down *) (*p.softerror := error;*) RETURN (* 0 *) END; RETURN (* error *) END; *) Machine.AtomicInc(NTCPSendTotal); IF (win > 0) & ((p.rcvnxt + win) - p.rcvadv > 0) THEN p.rcvadv := p.rcvnxt + win END; p.lastacksent := p.rcvnxt; p.flags := p.flags - {AckNow,DelAck} UNTIL ~sendalot; (* RETURN 0 *) END Output; (* Special output function for Rst and KeepAlive packets (fig. 26.34). *) PROCEDURE Respond(p: Connection; ack, seq: LONGINT; rf: SET); VAR win, sum: LONGINT; pseudoHdr: ARRAY MaxPseudoHdrLen OF CHAR; hdr: ARRAY MaxTCPHdrLen OF CHAR; pseudoHdrLen: LONGINT; BEGIN win := ASH(p.rcvspace, -p.rcvscale); (* zero in nilpcb *) IF rf = {} THEN (* keepalive probe *) INCL(rf, Ack) ELSE (* Rst segment *) END; (* set TCP header *) Network.PutNet2(hdr, 0, p.lport); Network.PutNet2(hdr, 2, p.fport); Network.PutNet4(hdr, 4, seq); Network.PutNet4(hdr, 8, ack); hdr[12] := CHR(MinTCPHdrLen DIV 4*10H); hdr[13] := CHR(SHORT(SHORT(SYSTEM.VAL(LONGINT, rf)))); Network.PutNet2(hdr, 14, win); Network.Put2(hdr, 16, 0); (* checksum := 0 *) Network.Put2(hdr, 18, 0); (* urgent pointer := 0 *) IF ~(Network.ChecksumTCP IN p.int.dev.calcChecksum) THEN (* set pseudo header *) pseudoHdrLen := p.int.WritePseudoHeader(pseudoHdr, p.int.localAdr, p.fip, IPTypeTCP, MinTCPHdrLen); sum := IP.Checksum1(pseudoHdr, 0, pseudoHdrLen, 0); sum := IP.Checksum2(hdr, 0, MinTCPHdrLen, sum); Network.Put2(hdr, 16, sum); (* checksum := sum; *) END; p.int.Send(IPTypeTCP, p.fip, hdr, hdr, MinTCPHdrLen, 0, 0, IP.MaxTTL); END Respond; (* Cancel all timers. *) PROCEDURE CancelTimers(p: Connection); VAR i: LONGINT; BEGIN FOR i := 0 TO NumTimers-1 DO p.timer[i] := 0 END END CancelTimers; (* Apply new RTT measurement to smoothed estimators. *) (* PROCEDURE XmitTimer(p: Connection; rtt: LONGINT); VAR delta: LONGINT; BEGIN Machine.AtomicInc(NTCPRTTUpdated); IF p.srtt # 0 THEN delta := rtt - 1 - ASH(p.srtt, -RTTShift); INC(p.srtt, delta); IF p.srtt <= 0 THEN p.srtt := 1 END; IF delta < 0 THEN delta := -delta END; DEC(delta, ASH(p.rttvar, -RTTVarShift)); INC(p.rttvar, delta); IF p.rttvar <= 0 THEN p.rttvar := 1 END ELSE p.srtt := ASH(rtt, RTTShift); p.rttvar := ASH(rtt, RTTVarShift-1) END; p.rtt := 0; p.rxtshift := 0; RangeSet(p.rxtcur, ASH(p.srtt, -RTTShift) + p.rttvar, p.rttmin, ReXmtMax); (*p.softerror := 0*) END XmitTimer; *) PROCEDURE XmitTimer(p: Connection; rtt: LONGINT); (* m -> rtt sa -> p.srtt sd -> p.rttvar *) VAR delta: LONGINT; BEGIN Machine.AtomicInc(NTCPRTTUpdated); IF p.srtt # 0 THEN (* delta := (rtt*4) - ASH(p.srtt, -RTTShift);*) delta := rtt - 1 - ASH(p.srtt, -RTTShift); INC(p.srtt, delta); IF p.srtt <= 0 THEN p.srtt := 1 END; IF delta < 0 THEN delta := -delta END; DEC(delta, ASH(p.rttvar, -RTTVarShift)); INC(p.rttvar, delta); IF p.rttvar <= 0 THEN p.rttvar := 1 END ELSE p.srtt := ASH(rtt, RTTShift); p.rttvar := ASH(rtt, RTTVarShift-1) END; p.rtt := 0; p.rxtshift := 0; (* p.rxtcur:=((p.srtt DIV 8)+ p.rttvar) DIV 4; IF p.rxtcur < (rtt+2) THEN p.rxtcur:=rtt; END;*) RangeSet(p.rxtcur, ASH(p.srtt, -RTTShift) + p.rttvar, MinTime, ReXmtMax); (*p.softerror := 0*) END XmitTimer; (* Drop len bytes from the front of the send buffer. *) PROCEDURE SbDrop(p: Connection; len: LONGINT); VAR buf: SendBuffer; BEGIN DEC(p.sndcc, len); INC(p.sndspace, len); buf := p.sndhead; LOOP IF buf.len > len THEN (* part of buffer will remain *) INC(buf.ofs, len); DEC(buf.len, len); (* ignore buf.seq (later: why?) *) EXIT END; DEC(len, buf.len); buf.ofs := 0; buf.len := 0; (* make buffer ready for re-use *) IF buf # p.sndtail THEN buf := buf.next END; IF len = 0 THEN EXIT END END; p.sndhead := buf END SbDrop; (* Close a connection (fig. 27.4-6). *) PROCEDURE CloseConnection(p: Connection); VAR buf: Network.Buffer; BEGIN IF FALSE (* we sent enough data to update rtt in route *) THEN END; pool.Remove(p); (* can not remove from parent's accept list, because of possible deadlock *) IF p = lastpcb THEN lastpcb := nilpcb END; (* only for GC -- race does not matter *) (* can not clear any pcb pointer fields, because Lookup is non-exclusive *) (*SoIsDisconnected(p);*) p.state := Closed; (* return the buffers still in receive queue *) p.rcvreasm := NIL; p.rcvtail := NIL; WHILE p.rcvhead # NIL DO buf := p.rcvhead; p.rcvhead := p.rcvhead.next; Network.ReturnBuffer(buf); END; Machine.AtomicInc(NTCPClosed); END CloseConnection; (* Drop a connection (fig. 27.2). *) PROCEDURE Drop(p: Connection; err: LONGINT); BEGIN IF p.state >= SynReceived THEN p.state := Closed; Output(p); Machine.AtomicInc(NTCPDrops) ELSE Machine.AtomicInc(NTCPConnDrops) END; (* IF (err = TimedOut) & (p.softerror # Ok) THEN err := p.softerror END; *) p.error := err; CloseConnection(p) END Drop; PROCEDURE SetMSS(p: Connection; mss: LONGINT); (* Error(NIYMSS, 0, p); *) (* Processing of received MSS option not necessary as long as sending is always done with MSS=536. *) END SetMSS; (* Process a received TCP segment. *) PROCEDURE Input(int: IP.Interface; type: LONGINT; fip, lip: IP.Adr; buffer: Network.Buffer); VAR lport, fport, hdrLen: LONGINT; p: Connection; BEGIN ASSERT(type = IPTypeTCP); Machine.AtomicInc(NTCPRcvTotal); IF IP.AdrsEqual(int.localAdr, lip) THEN IF buffer.len >= MinTCPHdrLen THEN hdrLen := LONG(ORD(buffer.data[buffer.ofs+12])) DIV 10H * 4; IF (hdrLen < MinTCPHdrLen) OR (hdrLen > buffer.len) THEN (* bad header length *) Error(BadHeaderLength, hdrLen, NIL); (*GotoDrop;*) ELSE (* findpcb *) p := lastpcb; fport := Network.GetNet2(buffer.data, buffer.ofs); lport := Network.GetNet2(buffer.data, buffer.ofs+2); IF (p = nilpcb) OR (~IP.AdrsEqual(p.fip, fip)) OR (p.lport # lport) OR (p.fport # fport) THEN p := pool.Lookup(lport, fport, fip); IF p = nilpcb THEN (* look for listening connection *) p := pool.Lookup(lport, NilPort, IP.NilAdr); END; lastpcb := p; Machine.AtomicInc(NTCPCacheMiss); END; p.Input(int, fip, hdrLen, buffer); RETURN; (* w/o returning buffer *) END; ELSE Error(SegmentTooSmall, buffer.len, NIL); END; ELSE Error(BroadcastReceived, buffer.len, NIL); END; Network.ReturnBuffer(buffer); END Input; (* Process a received TCP segment for the specified Connection. *) PROCEDURE ProcessInput(p: Connection; hdrLen: LONGINT; buffer: Network.Buffer; drop: BOOLEAN; VAR bufferQueued: BOOLEAN); CONST Options = 0; TSPresent = 1; NeedOutput = 2; VAR win, sum, urp, seq, ack, tsval, tsecr, acked, discard, tlen, optLen: LONGINT; pseudoHdr: ARRAY MaxPseudoHdrLen OF CHAR; pseudoHdrLen: LONGINT; pf, lf: SET; reassembledLength: LONGINT; fragmentBuffer: Network.Buffer; PROCEDURE GotoDrop; (* fig. 29.27 *) BEGIN IF TraceProtocol THEN TraceTCP("Drop", p, empty^, empty^, 0, 0, 0) END; IF drop THEN Drop(p, ConnectionAborted) END END GotoDrop; PROCEDURE GotoDropReset; (* fig. 29.27 - may be called with p = nilpcb (at start) *) BEGIN IF (Rst IN pf) THEN GotoDrop ELSE IF Ack IN pf THEN Respond(p, 0, ack, {Rst}) ELSE IF Syn IN pf THEN INC(tlen) END; Respond(p, seq + tlen, 0, {Rst,Ack}) END; IF drop THEN Drop(p, ConnectionAborted) END END END GotoDropReset; PROCEDURE GotoDropAfterAck; (* fig. 29.26 *) BEGIN IF Rst IN pf THEN GotoDrop ELSE INCL(p.flags, AckNow); Output(p) END END GotoDropAfterAck; PROCEDURE ProcessOptions; (* fig. 28.9 & 28.10 *) VAR opt: CHAR; i, m, optlen: LONGINT; BEGIN i := buffer.ofs+MinTCPHdrLen; m := buffer.ofs+hdrLen; LOOP IF i >= m THEN EXIT END; opt := buffer.data[i]; IF opt = 0X THEN EXIT; (* EOL *) ELSIF opt = 1X THEN optlen := 1; (* NOP *) ELSE optlen := ORD(buffer.data[i+1]); IF optlen = 0 THEN EXIT END; END; CASE opt OF 2X: (* MaxSeg *) IF (optlen = 4) & (Syn IN pf) THEN SetMSS(p, Network.GetNet2(buffer.data, i+2)); END; |3X: (* Window Scaling *) IF (optlen = 3) & (Syn IN pf) THEN INCL(p.flags, RcvdScale); p.requestedsscale := MIN(LONG(ORD(buffer.data[i+2])), MaxWinShift); END; |8X: (* Timestamp *) IF DoRFC1323 & (optlen = 10) THEN INCL(lf, TSPresent); tsval := Network.GetNet4(buffer.data, i+2); tsecr := Network.GetNet4(buffer.data, i+6); IF Syn IN pf THEN INCL(p.flags, RcvdTstmp); p.tsrecent := tsval; p.tsrecentage := timeSource.now; END; END; ELSE (* skip *) END; INC(i, optlen); END END ProcessOptions; PROCEDURE ProcessListen(): BOOLEAN; (* fig. 28.15-17 *) VAR res: WORD; q: Connection; BEGIN IF Rst IN pf THEN GotoDrop; RETURN FALSE END; IF Ack IN pf THEN GotoDropReset; RETURN FALSE END; IF ~(Syn IN pf) THEN GotoDrop; RETURN FALSE END; pool.Add(p, Network.GetNet2(buffer.data, buffer.ofs+2), Network.GetNet2(buffer.data, buffer.ofs), res); IF res # Ok THEN GotoDrop; RETURN FALSE; END; IF ProcOptions & (Options IN lf) THEN ProcessOptions END; p.iss := issSource.Get(); p.snduna := p.iss; p.sndnxt := p.iss; p.sndmax := p.iss; p.sndup := p.iss; p.irs := seq; p.rcvnxt := seq+1; p.rcvadv := p.rcvnxt; INCL(p.flags, AckNow); p.state := SynReceived; p.timer[Keep] := KeepInit; drop := FALSE; (* commit *) (* put on accept queue *) ASSERT(Objects.LockedByCurrent(p.parent)); (* came here via Connection.Input of parent *) q := p.parent.acceptNext; p.acceptNext := NIL; IF q = NIL THEN p.parent.acceptNext := p ELSE WHILE q.acceptNext # NIL DO q := q.acceptNext END; (* find last entry in queue *) q.acceptNext := p END; Machine.AtomicInc(NTCPAccepts); RETURN TRUE END ProcessListen; PROCEDURE ProcessSynSent(): BOOLEAN; BEGIN IF (Ack IN pf) & ((ack - p.iss <= 0) OR (ack - p.sndmax > 0)) THEN GotoDropReset; RETURN FALSE END; IF Rst IN pf THEN IF Ack IN pf THEN Error(ConnectionRefused, 0, p); Drop(p, ConnectionRefused) END; GotoDrop; RETURN FALSE END; IF ~(Syn IN pf) THEN GotoDrop; RETURN FALSE END; IF Ack IN pf THEN p.snduna := ack; IF p.sndnxt - p.snduna < 0 THEN p.sndnxt := p.snduna END; END; p.timer[ReXmt] := 0; p.irs := seq; p.rcvnxt := seq+1; p.rcvadv := p.rcvnxt; INCL(p.flags, AckNow); IF (Ack IN pf) & (p.snduna - p.iss > 0) THEN Machine.AtomicInc(NTCPConnects); (*SoIsConnected(p);*) p.state := Established; IF p.flags * {RcvdScale,ReqScale} = {RcvdScale,ReqScale} THEN p.sndscale := p.requestedsscale; p.rcvscale := p.requestrscale END; (*GotoPresent;*) (* not necessary, processed later *) IF p.rtt # 0 THEN XmitTimer(p, p.rtt) END ELSE p.state := SynReceived END; RETURN TRUE END ProcessSynSent; PROCEDURE Trim1; (* fig. 28.21 *) BEGIN INC(seq); IF tlen > p.rcvwnd THEN Error(DataBeyondWindow1, tlen - p.rcvwnd, p); (* data received beyond window (with Syn) *) tlen := p.rcvwnd; EXCL(pf, Fin) END; p.sndwl1 := seq-1; p.rcvup := seq END Trim1; PROCEDURE Paws(): BOOLEAN; (* fig. 28.22 *) BEGIN IF (TSPresent IN lf) & ~(Rst IN pf) & (p.tsrecent # 0) & (tsval - p.tsrecent < 0) THEN IF (timeSource.now - p.tsrecentage) > PawsIdle THEN p.tsrecent := 0 ELSE Error(DuplicateSegmentPAWS, tlen, p); (* duplicate segment (PAWS) *) GotoDropAfterAck; RETURN FALSE END END; RETURN TRUE END Paws; PROCEDURE Trim2(todrop: LONGINT): BOOLEAN; (* fig. 28.24-25, corrected fig. 28.30*) BEGIN IF Syn IN pf THEN EXCL(pf, Syn); INC(seq); IF urp > 1 THEN DEC(urp) ELSE EXCL(pf, Urg) END; DEC(todrop) END; IF (todrop > tlen) OR ((todrop = tlen) & ~(Fin IN pf)) THEN EXCL(pf, Fin); INCL(p.flags, AckNow); todrop := tlen; Error(DuplicateSegment, todrop, p) (* duplicate segment *) ELSE Error(DuplicatePartialSegment, todrop, p) (* partially duplicate segment *) END; INC(discard, todrop); INC(seq, todrop); DEC(tlen, todrop); IF urp > todrop THEN DEC(urp, todrop) ELSE EXCL(pf, Urg); urp := 0 END; RETURN TRUE END Trim2; PROCEDURE Trim3(todrop: LONGINT): BOOLEAN; (* fig. 28.29 *) BEGIN IF todrop >= tlen THEN IF (Syn IN pf) & (p.state = TimeWait) & (seq - p.rcvnxt > 0) THEN (*iss := p.rcvnxt + ISSInc; CloseConnection(p); goto findpcb*) Error(NIYNewIncarnation, 0, p); (* new incarnation NIY - also read p. 945-946 *) GotoDropAfterAck; RETURN FALSE END; IF (p.rcvwnd = 0) & (seq = p.rcvnxt) THEN INCL(p.flags, AckNow); Machine.AtomicInc(NTCPRcvWinProbe) ELSE Error(DataBeyondWindow2, tlen, p); (* data received beyond window (complete) *) GotoDropAfterAck; RETURN FALSE END ELSE Error(DataBeyondWindow3, todrop, p) (* data received beyond window (partial) *) END; DEC(tlen, todrop); pf := pf - {Psh,Fin}; RETURN TRUE END Trim3; PROCEDURE RecordTS; (* fig. 28.35 *) VAR x: LONGINT; BEGIN IF DoRFC1323 THEN IF pf * {Syn,Fin} # {} THEN x := 1 ELSE x := 0 END; IF p.lastacksent - (seq + tlen + x) < 0 THEN p.tsrecentage := timeSource.now; p.tsrecent := tsval; END END END RecordTS; PROCEDURE ProcessRst(): BOOLEAN; (* fig. 28.36 *) BEGIN CASE p.state OF SynReceived, Established, FinWait1, FinWait2, CloseWait: IF p.state = SynReceived THEN p.error := ConnectionRefused ELSE p.error := ConnectionReset END; p.state := Closed; Error(p.error, 0, p); (* connection reset *) CloseConnection(p); GotoDrop; RETURN FALSE |Closing, LastAck: CloseConnection(p); GotoDrop; RETURN FALSE ELSE (* skip *) END; RETURN TRUE END ProcessRst; PROCEDURE ProcessAck(): BOOLEAN; (* fig. 29.2-29.14 *) VAR onxt, cw, incr: LONGINT; finacked: BOOLEAN; BEGIN IF p.state IN {SynReceived..TimeWait} THEN IF p.state = SynReceived THEN IF (p.snduna - ack > 0) OR (ack - p.sndmax > 0) THEN GotoDropReset; RETURN FALSE END; Machine.AtomicInc(NTCPConnects); (*SoIsConnected(p);*) p.state := Established; IF p.flags * {RcvdScale,ReqScale} = {RcvdScale,ReqScale} THEN p.sndscale := p.requestedsscale; p.rcvscale := p.requestrscale END; (* IF tlen > 0 THEN (* would inc of twice by the header size *) Reasm; END; *) p.sndwl1 := seq-1; END; IF ack - p.snduna <= 0 THEN IF (tlen = 0) & (win = p.sndwnd) THEN Error(DuplicateAck, 0, p); (* duplicate ack *) IF (p.timer[ReXmt] = 0) OR (ack # p.snduna) THEN p.dupacks := 0 ELSE INC(p.dupacks); IF p.dupacks = ReXmtThresh THEN onxt := p.sndnxt; p.sndssthresh := MAX(MIN(p.sndwnd, p.sndcwnd) DIV 2 DIV p.maxseg, 2) * p.maxseg; IF TraceCongestion THEN KernelLog.String("DA sndssthresh := "); KernelLog.Int(p.sndssthresh, 1); KernelLog.Ln END; p.timer[ReXmt] := 0; p.rtt := 0; p.sndnxt := ack; p.sndcwnd := p.maxseg; Output(p); p.sndcwnd := p.sndssthresh + p.maxseg * p.dupacks; IF onxt - p.sndnxt > 0 THEN p.sndnxt := onxt END; GotoDrop; RETURN FALSE ELSIF p.dupacks > ReXmtThresh THEN INC(p.sndcwnd, p.maxseg); Output(p); GotoDrop; RETURN FALSE ELSE (* skip *) END END ELSE p.dupacks := 0 END; RETURN TRUE (* skip rest of Ack processing - goto step 6 *) END; IF (p.dupacks > ReXmtThresh) & (p.sndcwnd > p.sndssthresh) THEN p.sndcwnd := p.sndssthresh END; p.dupacks := 0; IF ack - p.sndmax > 0 THEN Error(OutOfRangeAck, ack - p.sndmax, p); GotoDropAfterAck; RETURN FALSE; END; acked := ack - p.snduna; Machine.AtomicInc(NTCPAckPack); Machine.AtomicAdd(NTCPAckByte, acked); IF TSPresent IN lf THEN XmitTimer(p, timeSource.now - tsecr + 1) ELSIF (p.rtt # 0) & (ack - p.rtseq > 0) THEN XmitTimer(p, p.rtt) ELSE (* skip *) END; IF ack = p.sndmax THEN p.timer[ReXmt] := 0; INCL(lf, NeedOutput) ELSIF p.timer[Persist] = 0 THEN p.timer[ReXmt] := p.rxtcur; ELSE (* skip *) END; cw := p.sndcwnd; incr := p.maxseg; IF cw > p.sndssthresh THEN incr := incr * incr DIV cw END; p.sndcwnd := MIN(cw + incr, ASH(MaxWin, p.sndscale)); IF acked > p.sndcc THEN DEC(p.sndwnd, p.sndcc); SbDrop(p, p.sndcc); finacked := TRUE ELSE SbDrop(p, acked); DEC(p.sndwnd, acked); finacked := FALSE END; (*IF Notify IN p.flags THEN SoWakeup(p) END;*) p.snduna := ack; IF p.sndnxt - p.snduna < 0 THEN p.sndnxt := p.snduna END; CASE p.state OF FinWait1: IF finacked THEN (*IF NoMore IN p.flags THEN SoIsDisconnected(p); p.timer[MSL2] := MaxIdle END;*) p.timer[MSL2] := MaxIdle; (* otherwise we hang in FinWait2 *) p.state := FinWait2 END |Closing: IF finacked THEN p.state := TimeWait; CancelTimers(p); p.timer[MSL2] := 2 * MSL; (*SoIsDisconnected(p)*) END |LastAck: IF finacked THEN CloseConnection(p); GotoDrop; RETURN FALSE END |TimeWait: p.timer[MSL2] := 2 * MSL; GotoDropAfterAck; RETURN FALSE ELSE (* skip *) END (* CASE *) END; RETURN TRUE END ProcessAck; PROCEDURE GotoPresent; VAR buf: Network.Buffer; BEGIN buf := p.rcvreasm; (* first buffer on reassembly list *) IF (p.state >= Established) & (buf # NIL) & (buf.int = p.rcvnxt) THEN REPEAT DEC(p.rcvspace, buf.len); INC(p.rcvnxt, buf.len); pf := buf.set * {Fin}; buf := buf.next; UNTIL (buf = NIL) OR (buf.int # p.rcvnxt); p.rcvreasm := buf; ELSE EXCL(pf, Fin); END; END GotoPresent; PROCEDURE Reasm; VAR pos, last: Network.Buffer; lap: LONGINT; BEGIN buffer.set := pf; buffer.int := seq; INC(buffer.ofs, hdrLen + discard); buffer.len := tlen; IF p.rcvhead = NIL THEN (* insert into empty queue *) buffer.next := NIL; buffer.prev := NIL; p.rcvhead := buffer; p.rcvreasm := buffer; p.rcvtail := buffer; bufferQueued := TRUE; ELSE (* go to insert position, insert in front of pos and after last. *) pos := p.rcvreasm; IF pos = NIL THEN (* no reasm part of queue *) last := p.rcvtail; ELSE last := pos.prev; WHILE (pos # NIL) & (pos.int < seq) DO last := pos; pos := pos.next; END; END; IF last # NIL THEN (* check for overlap with previous buffer *) lap := (last.int + last.len) - seq; IF lap > 0 THEN (* some overlap - drop new data *) IF lap >= tlen THEN (* complete duplicate *) Error(DataDuplicatePrevComplete, tlen, p); RETURN; ELSE (* partial duplicate *) Error(DataDuplicatePrevPartial, lap, p); INC(buffer.ofs, lap); DEC(buffer.len, lap); DEC(tlen, lap); INC(seq, lap); buffer.int := seq; END; END; END; IF pos # NIL THEN (* check for overlap with next buffer *) lap := (seq + tlen) - pos.int; IF lap > 0 THEN (* some overlap - drop new data *) IF lap >= tlen THEN (* complete duplicate *) Error(DataDuplicateNextComplete, lap, p); RETURN; ELSE (* partial duplicate *) Error(DataDuplicateNextPartial, lap, p); DEC(tlen, lap); DEC(buffer.len, lap); END; END; END; Machine.AtomicInc(NTCPRcvPackSlow); Machine.AtomicAdd(NTCPRcvByteSlow, tlen); (* insert buffer into correct position in queue *) IF pos = NIL THEN (* insert at the end of the queue *) ASSERT(last = p.rcvtail); buffer.next := NIL; buffer.prev := last; buffer.prev.next := buffer; p.rcvtail := buffer; IF p.rcvreasm = NIL THEN p.rcvreasm := buffer; END; ELSIF last = NIL THEN (* insert at the beginning of the queue *) ASSERT((pos = p.rcvhead) & (pos = p.rcvreasm)); buffer.prev := NIL; buffer.next := pos; buffer.next.prev := buffer; p.rcvhead := buffer; p.rcvreasm := buffer; ELSE (* insert somewhere in the middle *) ASSERT((last.next = pos) & (pos.prev = last)); last.next := buffer; buffer.prev := last; pos.prev := buffer; buffer.next := pos; IF buffer.next = p.rcvreasm THEN p.rcvreasm := buffer; END; END; bufferQueued := TRUE; END; GotoPresent; END Reasm; PROCEDURE DoData; BEGIN IF ((tlen # 0) OR (Fin IN pf)) & (p.state < TimeWait) THEN IF (seq = p.rcvnxt) & (p.rcvreasm = NIL) & (p.state = Established) THEN INCL(p.flags, DelAck) ELSE INCL(p.flags, AckNow) (* cf. fig. 27.15 *) END; Reasm(); ELSE EXCL(pf, Fin) END; IF Fin IN pf THEN IF p.state < TimeWait THEN (*SoCantRcvMore(p);*) INCL(p.flags, AckNow); INC(p.rcvnxt) END; CASE p.state OF SynReceived, Established: p.state := CloseWait |FinWait1: p.state := Closing |FinWait2: p.state := TimeWait; CancelTimers(p); p.timer[MSL2] := 2 * MSL; (*SoIsDisconnected(p)*) |TimeWait: p.timer[MSL2] := 2 * MSL ELSE (* skip *) END (* CASE *) END; IF TraceProtocol THEN TraceTCP("Input", p, buffer.data, empty^, buffer.ofs+hdrLen, 0, 0) END; IF (NeedOutput IN lf) OR (AckNow IN p.flags) THEN Output(p) END END DoData; PROCEDURE Step6(): BOOLEAN; BEGIN IF (Ack IN pf) & ((p.sndwl1 - seq < 0) OR ((p.sndwl1 = seq) & ((p.sndwl2 - ack < 0) OR ((p.sndwl2 = ack) & (win > p.sndwnd))))) THEN IF (tlen = 0) & (p.sndwl2 = ack) & (win > p.sndwnd) THEN Machine.AtomicInc(NTCPRcvWinUpd) END; p.sndwnd := win; p.sndwl1 := seq; p.sndwl2 := ack; IF p.sndwnd > p.maxsndwnd THEN p.maxsndwnd := p.sndwnd END; INCL(lf, NeedOutput) END; IF (Urg IN pf) & (urp # 0) & (p.state < TimeWait) THEN Error(NIYOutOfBand, 0, p); (* out-of-band data NIY *) (*IF urp + p.rcvcc > sbmax THEN*) urp := 0; EXCL(pf, Urg); DoData; RETURN FALSE; (*END;*) ELSE IF p.rcvnxt - p.rcvup > 0 THEN p.rcvup := p.rcvnxt END END; RETURN TRUE END Step6; BEGIN lf := {}; discard := 0; (* data from 0 to discard has to be thrown away. *) tlen := buffer.len-hdrLen; (* length of user data. *) optLen := hdrLen - MinTCPHdrLen; (* length of options *) pf := SYSTEM.VAL(SET, LONG(ORD(buffer.data[buffer.ofs+13]))); IF optLen > 0 THEN (* TCP options present *) Machine.AtomicInc(NTCPRcvOptions); IF ProcOptions THEN IF DoRFC1323 THEN (* quick processing of timestamp option, fig. 28.4 *) IF ((optLen = 12) OR ((optLen > 12) & (buffer.data[buffer.ofs+MinTCPHdrLen+12] = 0X))) & ~(Syn IN pf) & (buffer.data[buffer.ofs+MinTCPHdrLen] = 1X) & (buffer.data[buffer.ofs+MinTCPHdrLen+1] = 1X) & (buffer.data[buffer.ofs+MinTCPHdrLen+2] = 8X) & (buffer.data[buffer.ofs+MinTCPHdrLen+3] = 0AX) THEN INCL(lf, TSPresent); tsval := Network.GetNet4(buffer.data, buffer.ofs+MinTCPHdrLen+4); tsecr := Network.GetNet4(buffer.data, buffer.ofs+MinTCPHdrLen+8); ELSE INCL(lf, Options); END; ELSE INCL(lf, Options); END END END; (* initialize variables needed for GotoDropReset *) seq := Network.GetNet4(buffer.data, buffer.ofs+4); ack := Network.GetNet4(buffer.data, buffer.ofs+8); (* pf := SYSTEM.VAL(SET, LONG(ORD(buffer.data[buffer.ofs+13])));*) IF p = nilpcb THEN IF packetDumpListener # NIL THEN packetDumpListener(p.fip, buffer) END; GotoDropReset; RETURN END; IF p.state <= Closed THEN GotoDrop; RETURN END; IF ~(Network.ChecksumTCP IN buffer.calcChecksum) THEN (* calculate checksum *) (* set pseudo header *) reassembledLength := 0; fragmentBuffer := buffer; WHILE fragmentBuffer # NIL DO INC(reassembledLength, fragmentBuffer.len); fragmentBuffer := fragmentBuffer.nextFragment; END; pseudoHdrLen := p.int.WritePseudoHeader(pseudoHdr, p.fip, p.int.localAdr, IPTypeTCP, reassembledLength); sum := IP.Checksum1(pseudoHdr, 0, pseudoHdrLen, 0); IF buffer.nextFragment # NIL THEN (* fragmented packets *) fragmentBuffer := buffer; WHILE fragmentBuffer.nextFragment # NIL DO sum := IP.Checksum1(fragmentBuffer.data, fragmentBuffer.ofs, fragmentBuffer.len, sum); fragmentBuffer := fragmentBuffer.nextFragment; END; sum := IP.Checksum2(fragmentBuffer.data, fragmentBuffer.ofs, fragmentBuffer.len, sum); ELSE sum := IP.Checksum2(buffer.data, buffer.ofs, buffer.len, sum); END; IF sum # 0 THEN Error(BadChecksum, 0, p); GotoDrop; RETURN; END; END; win := Network.GetNet2(buffer.data, buffer.ofs+14); urp := Network.GetNet2(buffer.data, buffer.ofs+18); IF ~(Syn IN pf) THEN win := ASH(win, p.sndscale) END; p.idle := 0; p.timer[Keep] := KeepIdle; IF ProcOptions & (Options IN lf) & (p.state # Listen) THEN ProcessOptions END; (* header prediction (fig. 28.11-13) *) IF (p.state = Established) & (pf * {Syn,Fin,Rst,Urg,Ack} = {Ack}) & (~DoRFC1323 OR ~(TSPresent IN lf) OR (tsval - p.tsrecent >= 0)) & (seq = p.rcvnxt) & (win # 0) & (win = p.sndwnd) & (p.sndnxt = p.sndmax) THEN IF DoRFC1323 & (TSPresent IN lf) & (seq - p.lastacksent <= 0) THEN p.tsrecentage := timeSource.now; p.tsrecent := tsval; (* see p. 937 & fig. 26.20 *) END; IF tlen = 0 THEN IF (ack - p.snduna > 0) & (ack - p.sndmax <= 0) & (p.sndcwnd >= p.sndwnd) & (p.dupacks < ReXmtThresh) THEN (* p.dupacks < ReXmtThres fix from "Performance Problems in 4.4BSD TCP" *) Machine.AtomicInc(NTCPPredAck); IF DoRFC1323 & (TSPresent IN lf) THEN XmitTimer(p, timeSource.now - tsecr + 1) ELSIF (p.rtt # 0) & (ack - p.rtseq > 0) THEN XmitTimer(p, p.rtt) ELSE (* skip *) END; acked := ack - p.snduna; Machine.AtomicInc(NTCPAckPack); Machine.AtomicAdd(NTCPAckByte, acked); SbDrop(p, acked); p.snduna := ack; IF ack = p.sndmax THEN p.timer[ReXmt] := 0 ELSIF p.timer[Persist] = 0 THEN p.timer[ReXmt] := p.rxtcur; ELSE (* skip *) END; (*IF Notify IN p.flags THEN SoWakeup(p) END;*) IF p.sndcc # 0 THEN Output(p) END; RETURN END ELSIF (ack = p.snduna) & (p.rcvreasm = NIL) & (tlen <= p.rcvspace) THEN Machine.AtomicInc(NTCPPredData); Machine.AtomicInc(NTCPRcvPackFast); Machine.AtomicAdd(NTCPRcvByteFast, tlen); (* can assume no overlap with last or next buffers *) Reasm; (* queue the buffer directly *) (*SoWakeup(p);*) INCL(p.flags, DelAck); RETURN ELSE (* skip - continue to slow path *) END END; (* slow path *) p.rcvwnd := MAX(MAX(p.rcvspace, 0), p.rcvadv - p.rcvnxt); CASE p.state OF Listen: IF ProcessListen() THEN Trim1 ELSE RETURN END |SynSent: IF ProcessSynSent() THEN Trim1 ELSE RETURN END ELSE IF DoRFC1323 & ~Paws() THEN RETURN END; IF (p.rcvnxt - seq > 0) & ~Trim2(p.rcvnxt - seq) THEN RETURN END; (*IF (Gone IN p.flags) & (p.state > CloseWait) & (tlen # 0) THEN CloseConnection(p); Machine.AtomicInc(NTCPRcvAfterClose); GotoDropReset; RETURN END;*) IF ((seq + tlen) - (p.rcvnxt + p.rcvwnd) > 0) & ~Trim3((seq + tlen) - (p.rcvnxt + p.rcvwnd)) THEN RETURN END; IF DoRFC1323 & (TSPresent IN lf) & (seq - p.lastacksent <= 0) THEN RecordTS END; IF (Rst IN pf) & ~ProcessRst() THEN RETURN END; IF Syn IN pf THEN Drop(p, ConnectionReset); GotoDropReset; RETURN END; IF ~(Ack IN pf) THEN GotoDrop; RETURN END; IF ~ProcessAck() THEN RETURN END; END; (* CASE *) IF ~Step6() THEN RETURN END; DoData; END ProcessInput; (* Initialize a new Connection. *) PROCEDURE InitConnection(p: Connection); VAR buf: SendBuffer; BEGIN IF ~NewZeros THEN (* clear all fields *) CancelTimers(p); p.fip := IP.NilAdr; p.fport := NilPort; p.rxtshift := 0; p.dupacks := 0; p.snduna := 0; p.sndnxt := 0; p.sndup := 0; p.sndwl1 := 0; p.sndwl2 := 0; p.iss := 0; p.sndwnd := 0; p.rcvwnd := 0; p.rcvnxt := 0; p.rcvup := 0; p.irs := 0; p.rcvadv := 0; p.sndmax := 0; p.idle := 0; p.rtt := 0; p.rtseq := 0; p.maxsndwnd := 0; p.sndscale := 0; p.rcvscale := 0; p.requestrscale := 0; p.requestedsscale := 0; p.tsrecent := 0; p.tsrecentage := 0; p.lastacksent := 0; p.error := 0; p.acceptable := 0; p.sndcc := 0; p.poolNext := NIL; p.parent := NIL; p.acceptNext := NIL END; (* initialize fields *) p.maxseg := MSS; p.state := Closed; IF DoRFC1323 THEN p.flags := {ReqScale,ReqTstmp} ELSE p.flags := {} END; p.srtt := SRTTBase; p.rttvar := SRTTDflt * 4; p.rttmin := MinTime; RangeSet(p.rxtcur, (SRTTBase DIV 4 + SRTTDflt * 4) DIV 2, MinTime, ReXmtMax); p.sndcwnd := ASH(MaxWin, MaxWinShift); p.sndssthresh := ASH(MaxWin, MaxWinShift); p.sndspace := MaxSendSpace; p.rcvspace := MaxRecvSpace; p.rcvhiwat := MaxRecvSpace; WHILE (p.requestrscale < MaxWinShift) & (ASH(MaxWin, p.requestrscale) < p.rcvhiwat) DO INC(p.requestrscale) END; Machine.AtomicInc(NTCPNewBufs); (* allocate send buffer *) NEW(buf); NEW(buf.data, MSS * SegsPerBuf); IF ~NewZeros THEN buf.ofs := 0; buf.len := 0; END; buf.next := buf; p.sndhead := buf; p.sndtail := buf; NEW(p.sndcontig, MSS); (* init receive buffer *) p.rcvhead := NIL; END InitConnection; (* Move connection to next state in close process. *) PROCEDURE UsrClosed(p: Connection); BEGIN CASE p.state OF Closed, Listen, SynSent: p.state := Closed; CloseConnection(p) |SynReceived, Established: p.state := FinWait1 |CloseWait: p.state := LastAck ELSE (* skip *) END; (*IF p.state >= FinWait2 THEN SoIsDisconnected(p) END*) END UsrClosed; (* If t is not zero, decrement it. Return true if it has reached 0, false otherwise. *) PROCEDURE Expired(VAR t: LONGINT): BOOLEAN; BEGIN IF t # 0 THEN DEC(t); RETURN t = 0 ELSE RETURN FALSE END END Expired; PROCEDURE GetID(): LONGINT; VAR id, time, date: LONGINT; BEGIN Clock.Get(time, date); id := SYSTEM.VAL(LONGINT, SYSTEM.VAL(SET, time) / SYSTEM.VAL(SET, date)); RETURN id; END GetID; (** Aos command - display all errors *) PROCEDURE DisplayErrors*(context : Commands.Context); VAR i: LONGINT; BEGIN FOR i := 1 TO NumErrors-1 DO IF NTCPError[i] # 0 THEN context.out.String("TCP: Error "); context.out.Int(i+MinError, 1); context.out.String(" "); context.out.Int(NTCPError[i], 1); context.out.Ln; END; END; END DisplayErrors; (** Aos command - discard and finalize all connections *) PROCEDURE DiscardAll*; BEGIN pool.Finalize(); END DiscardAll; (** Temporary trace procedure. *) PROCEDURE ToggleTrace*; BEGIN trace := ~trace; KernelLog.Enter; KernelLog.String("TCP trace "); IF trace THEN KernelLog.String("on") ELSE KernelLog.String("off") END; KernelLog.Exit END ToggleTrace; PROCEDURE Init; VAR i: LONGINT; BEGIN FOR i := 0 TO MaxRxtShift DO IF i < 6 THEN backoff[i] := ASH(1, i) ELSE backoff[i] := 64 END; INC(totbackoff, backoff[i]) END; (* maxdiff := 0;*) (* backoff[0]:=1; backoff[1]:=1; backoff[2]:=2; backoff[3]:=3; backoff[4]:=5; backoff[5]:=10; backoff[6]:=18; backoff[7]:=30; backoff[8]:=60; backoff[9]:=120; backoff[10]:=240; backoff[11]:=240; backoff[12]:=240;*) ASSERT(MaxRxtShift=12); totbackoff := 0; FOR i:=0 TO MaxRxtShift DO INC(totbackoff, backoff[i]); END; (* Flags used when sending segments in tcp_output. Basic flags {Rst, Ack, Syn, Fin} are totally determined by state, with the proviso that Fin is sent only if all data queued for output is included in the segment. The {Psh,Urg} flags are set as necessary. *) outflags[Closed] := {Rst,Ack}; outflags[Listen] := {}; outflags[SynSent] := {Syn}; outflags[SynReceived] := {Syn,Ack}; outflags[Established] := {Ack}; outflags[CloseWait] := {Ack}; outflags[FinWait1] := {Fin,Ack}; outflags[Closing] := {Fin,Ack}; outflags[LastAck] := {Fin,Ack}; outflags[FinWait2] := {Ack}; outflags[TimeWait] := {Ack}; (* other globals *) NEW(nilpcb); nilpcb.lport := -1; (* can never match *) nilpcb.int := NIL; IF ~NewZeros THEN nilpcb.rcvspace := 0; nilpcb.rcvscale := 0 END; (* for Respond *) lastpcb := nilpcb; NEW(empty, 1); NEW(pool); NEW(issSource, GetID()); NEW(timeSource) END Init; PROCEDURE Cleanup; BEGIN IP.RemoveReceiver(IPTypeTCP); pool.Finalize(); timeSource.Finalize(); END Cleanup; BEGIN ASSERT(~DoRFC1323 OR (ProcOptions & GenOptions)); (* constants should make sense *) ASSERT(SYSTEM.VAL(LONGINT, {Fin}) = 1); (* bit order for flags cast in Input, Output and Respond *) ASSERT((TimerPeriod MOD FastPeriod = 0) & (TimerPeriod MOD SlowPeriod = 0)); trace := FALSE; Init(); IP.InstallReceiver(IPTypeTCP, Input); Modules.InstallTermHandler(Cleanup); END TCP. (* History: 08.11.2003 mvt Changed for new interface of IP and Network. 08.11.2003 mvt Fixed array position error in ProcessOptions(). 09.11.2003 mvt Min()/Max() functions now in inline assembler. 10.11.2003 mvt Added InterfaceClosed detection in Connection.SlowTimer(). 10.11.2003 mvt Added correct finalization of Connection, ConnectionPool, Timer and the module itself. 11.11.2003 mvt Completely changed receive buffer queueing, integrated Network buffers directly. 12.11.2003 mvt Completely changed ConnectionPool, now working with a hash table. 12.12.2003 mvt Bugfixed entire module by comparing it with the book. 12.12.2003 mvt Completed header prediction code according to the book. 12.12.2003 mvt Added support for RFC1323 (timestamp, PAWS). 12.12.2003 mvt Added support for window scaling (for windows >64KB). 14.02.2004 mvt Fixed reassembly bug in Reasm(). 14.02.2004 mvt Fixed MSS option sending bug in Output(). 28.02.2004 mvt Fixed early Fin sending bug in Output(). 04.03.2004 rp Fixed & (p.dupacks < ReXmtThresh) in Step6 according to ftp://ftp.cs.arizona.edu/xkernel/papers/tcp_problems.ps 04.03.2004 rp Fixed XmitTimer according to ftp://ftp.cs.arizona.edu/xkernel/papers/tcp_problems.ps 02.05.2005 eb Supports IPv6 (WritePseudoHdr) and fragmented IP packets. *) (* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. *) (* pjm: Gigabit: Alteon AceNIC / 3Com 3C985 / NetGear GA620 Gigabit Ethernet Adapter http://sanjose.alteon.com/open.shtml Linux Driver http://home.cern.ch/~jes/gige/acenic.html Packet Engines Hamachi Driver http://www.nscl.msu.edu/~kasten/perf/hamachi/ A Connection has to react to external and internal events 1. User calls (external) Connection.Open Connection.Send Connection.Receive Connection.Available Connection.Close 2. Timer.HandleTimeout (internal) Connection.DelayedAck Connection.SlowTimer 3. Packet arrival (external) Connection.Input (from Input) The Timer reacts to external and internal events 1. Timer.HandleTimeout (external) 2. Timer.GetISS (internal - only called from Connection.Open, which is only called externally) The ConnectionPool reacts to external and internal events 1. Lookup (internal) 2. Enumerate (external) 3. Add (internal) 4. Remove (internal) *)