/*
* Copyright (c) 1997 The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Network Research
* Group at Lawrence Berkeley National Laboratory.
* 4. Neither the name of the University nor of the Laboratory may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This code below was motivated in part by code contributed by
* Kathie Nichols (nichols@com21.com). The code below is based primarily
* on the 4.4BSD TCP implementation. -KF [kfall@ee.lbl.gov]
*
* Major revisions, 8/97, kmn (vj)
*
* Some Warnings:
* this version of TCP will not work correctly if the sequence number
* goes above 2147483648 due to sequence number wrap
*
* this version of TCP currently sends data on the 3rd segment of
* the initial 3-way handshake. So, the typical sequence of events is
* A ------> SYN ------> B
* A <----- SYN+ACK ---- B
* A ------> ACK+data -> B
* whereas many "real-world" TCPs don't send data until a 4th segment
*
* there is no dynamic receiver's advertised window. The advertised
* window is simulated by simply telling the sender a bound on the window
* size (wnd_).
*
* in real TCP, a user process performing a read (via PRU_RCVD)
* calls tcp_output each time to (possibly) send a window
* update. Here we don't have a user process, so we simulate
* a user process always ready to consume all the receive buffer
*
* Notes:
* wnd_, wnd_init_, cwnd_, ssthresh_ are in segment units
* sequence and ack numbers are in byte units
*
* Futures:
* there are different existing TCPs with respect to how
* ack's are handled on connection startup. Some delay
* the ack for the first segment, which can cause connections
* to take longer to start up than if we be sure to ack it quickly.
*/
#ifndef lint
static const char rcsid[] =
"@(#) $Header: /nfs/jade/vint/CVSROOT/ns-2/baytcp/tcp-full-bay.cc,v 1.4 2001/07/19 17:57:02 haldar Exp $ (LBL)";
#endif
#include "tclcl.h"
#include "ip.h"
#include "tcp-full-bay.h"
#include "flags.h"
#include "random.h"
#include "template.h"
#define TRUE 1
#define FALSE 0
static class BayFullTcpClass : public TclClass {
public:
BayFullTcpClass() : TclClass("Agent/TCP/BayFullTcp") {}
TclObject* create(int, const char*const*) {
return (new BayFullTcpAgent());
}
} class_bayfull;
static class TahoeBayFullTcpClass : public TclClass {
public:
TahoeBayFullTcpClass() : TclClass("Agent/TCP/BayFullTcp/Tahoe") {}
TclObject* create(int, const char*const*) {
// tcl lib code
// sets reno_fastrecov_ to false
//return (new BayFullTcpAgent());
fprintf(stderr,"Tahoe, NewReno or Sack flavors are NOT available for BayTCP!! Use BayFullTcp only, which actually implements Reno.\n");
exit(1);
}
} class_tahoe_bayfull;
static class NewRenoBayFullTcpClass : public TclClass {
public:
NewRenoBayFullTcpClass() : TclClass("Agent/TCP/BayFullTcp/Newreno") {}
TclObject* create(int, const char*const*) {
// tcl lib code
// sets deflate_on_pack_ to false
//return (new BayFullTcpAgent());
fprintf(stderr,"Tahoe, NewReno or Sack flavors are NOT available for BayFullTCP!! Use BayFullTcp only, which actually implements Reno.\n");
exit(1);
}
} class_newreno_bayfull;
static class SackBayFullTcpClass : public TclClass {
public:
SackBayFullTcpClass() : TclClass("Agent/TCP/BayFullTcp/Sack") {}
TclObject* create(int, const char*const*) {
//return (new BayFullTcpAgent());
fprintf(stderr,"Tahoe, NewReno or Sack flavors are NOT available for BayFullTCP!! Use BayFullTcp only, which actually implements Reno.\n");
exit(1);
}
} class_sack_bayfull;
/*
* Tcl bound variables:
* segsperack: for delayed ACKs, how many to wait before ACKing
* segsize: segment size to use when sending
*/
BayFullTcpAgent::BayFullTcpAgent() : flags_(0),
state_(TCPS_CLOSED), rq_(rcv_nxt_), last_ack_sent_(0), app_(0),
delack_timer_(this)
{
bind("segsperack_", &segs_per_ack_);
bind("segsize_", &maxseg_);
bind("tcprexmtthresh_", &tcprexmtthresh_);
bind("iss_", &iss_);
bind_bool("nodelay_", &nodelay_);
bind_bool("data_on_syn_",&data_on_syn_);
bind_bool("dupseg_fix_", &dupseg_fix_);
bind_bool("dupack_reset_", &dupack_reset_);
bind("interval_", &delack_interval_);
}
void
BayFullTcpAgent::delay_bind_init_all()
{
TcpAgent::delay_bind_init_all();
reset();
}
int
BayFullTcpAgent::delay_bind_dispatch(const char *varName, const char *localName, TclObject *tracer)
{
return TcpAgent::delay_bind_dispatch(varName, localName, tracer);
}
/*
* reset to starting point, don't set state_ here,
* because our starting point might be LISTEN rather
* than CLOSED if we're a passive opener
*/
void
BayFullTcpAgent::reset()
{
TcpAgent::reset();
highest_ack_ = 0;
last_ack_sent_ = 0;
rcv_nxt_ = 0; //kmn
flags_ = 0;
t_seqno_ = iss_;
close_on_empty_ = 0; //added 7/30/97 by kmn
switch_spa_thresh_ = 0;
first_data_ = 0; //don't open cwnd too early
}
void
BayFullTcpAgent::reinit()
{
cancel_rtx_timeout();
rtt_init();
cwnd_ = wnd_init_;
last_ack_ = highest_ack_ = 0;
ssthresh_ = int(wnd_);
awnd_ = wnd_init_ / 2.0;
recover_ = 0;
recover_cause_ = 0;
last_ack_sent_ = 0;
rcv_nxt_ = 0; //kmn
flags_ = 0;
t_seqno_ = maxseq_ = iss_;
switch_spa_thresh_ = 0;
/*
for(int i =0; i < NTIMER; i++) {
cancel(i);
}
*/
rq_.clear();
first_data_ = 0; //don't open cwnd too early
}
/*
* headersize:
* how big is an IP+TCP header in bytes
* (for now, is the basic size, but may changes
* in the future w/options; fix for sack)
*/
int
BayFullTcpAgent::headersize()
{
return (TCPIP_BASE_PKTSIZE);
}
/*
* cancel any pending timers
* free up the reassembly queue if there's anything there
*/
BayFullTcpAgent::~BayFullTcpAgent()
{
/*
* not required any more
register i;
for (i = 0; i < NTIMER; i++)
if (pending_[i])
cancel(i);
*/
rq_.clear();
}
/*
* the 'advance' interface to the regular tcp is in packet
* units. Here we scale this to bytes for full tcp.
*
* 'advance' is normally called by an "application" (i.e. data source)
* to signal that there is something to send
*
* 'curseq_' is the last byte number provided by the application
*/
void
BayFullTcpAgent::advance(int np)
{
// XXX hack:
// because np is in packets and a data source
// may pass a *huge* number as a way to tell us
// to go forever, just look for the huge number
// and if it's there, pre-divide it
if (np >= 0x10000000)
np /= maxseg_;
curseq_ += (np * maxseg_);
//
// state-specific operations:
// if CLOSED, do an active open/connect
// if ESTABLISHED, just try to send more
// if above ESTABLISHED, we are closing, so don't allow
// if anything else (establishing), do nothing here
//
if (state_ > TCPS_ESTABLISHED) {
fprintf(stderr,
"%f: BayFullTcpAgent::advance(%s): cannot advance while in state %d\n",
now(), name(), state_);
return;
} else if (state_ == TCPS_CLOSED) {
connect(); // initiate new connection
} else if (state_ == TCPS_ESTABLISHED)
send_much(0, REASON_NORMAL, 0);
return;
}
/*
* added 7/30/97 by kmn to allow to pass bytes and set close_on_empty_
*/
int
BayFullTcpAgent::advance(int n, int close_flag)
{
close_on_empty_ = close_flag;
//
// state-specific operations:
// if CLOSED, do an active open/connect
// if ESTABLISHED, just try to send more
// if above ESTABLISHED, we are closing, so don't allow
// if anything else (establishing), do nothing here
//
if (state_ > TCPS_ESTABLISHED) {
return 0; //try again later, please
} else if (state_ == TCPS_CLOSED) {
curseq_ = iss_ + n;
reinit();
connect(); // initiate new connection
}
else if (state_ == TCPS_ESTABLISHED)
curseq_ += n;
else
return 0;
return 1;
}
/*
* flags that are completely dependent on the tcp state
* (in real TCP, see tcp_fsm.h, the "tcp_outflags" array)
*/
int BayFullTcpAgent::outflags()
{
int flags = 0;
if ((state_ != TCPS_LISTEN) && (state_ != TCPS_SYN_SENT))
flags |= TH_ACK;
if ((state_ == TCPS_SYN_SENT) || (state_ == TCPS_SYN_RECEIVED))
flags |= TH_SYN;
if ((state_ == TCPS_FIN_WAIT_1) || (state_ == TCPS_LAST_ACK))
flags |= TH_FIN;
return (flags);
}
void BayFullTcpAgent::sendpacket(int seqno, int ackno, int pflags, int datalen,
int reason)
{
Packet* p = allocpkt();
hdr_tcp *tcph = hdr_tcp::access(p);
hdr_cmn *th = hdr_cmn::access(p);
tcph->seqno() = seqno;
tcph->ackno() = ackno;
tcph->flags() = pflags;
tcph->hlen() = headersize();
tcph->ts() = now();
/* Open issue: should tcph->reason map to pkt->flags_ as in ns-1?? */
tcph->reason() |= reason;
th->size() = datalen + headersize();
if (datalen <= 0)
++nackpack_;
else {
++ndatapack_;
ndatabytes_ += datalen;
}
if (reason == REASON_TIMEOUT || reason == REASON_DUPACK) {
++nrexmitpack_;
nrexmitbytes_ += datalen;
}
send(p, 0);
}
/*
* see if we should send a segment, and if so, send it
* (may be ACK or data)
* 'maxseq_' is called 'snd_max' in "real" TCP
* and is the largest seq number we've sent
*
* maxseg_, largest seq# we've sent (snd_max)
* flags_, flags regarding our internal state (t_state)
* pflags, a local used to build up the tcp header flags (flags)
* curseq_, is the highest sequence number given to us by "application"
* highest_ack_, the highest ACK we've seen for our data (snd_una)
* seqno, the next seq# we're going to send (snd_nxt), this will
* update t_seqno_ (the last thing we sent)
*/
void BayFullTcpAgent::output(int seqno, int reason)
{
int is_retransmit = (seqno < maxseq_);
int idle = (highest_ack_ == maxseq_);
//kmn - changing all this for clarity 8/7/97
int buffered_bytes = (curseq_ + iss_) - seqno;
int datalen = min(buffered_bytes, (highest_ack_ + (window() * maxseg_)) - seqno);
int pflags = outflags();
int emptying_buffer = 0;
if((pflags & TH_SYN) || datalen <= 0)
datalen = 0;
else if(datalen > maxseg_) {
datalen = maxseg_;
} else if(datalen == buffered_bytes) {
emptying_buffer = 1;
pflags |= TH_PUSH;
//usrclosed() causes nested calls to output()
if(close_on_empty_) {
pflags |= TH_FIN;
state_ = TCPS_FIN_WAIT_1;
}
}
//end of kmn changes
/* turn off FIN if there's really more to send */
if (datalen > 0 && !emptying_buffer)
pflags &= ~TH_FIN;
/* sender SWS avoidance (Nagle) */
if (datalen > 0) {
// if full-sized segment, ok
if (datalen == maxseg_)
goto send;
// if Nagle disabled and buffer clearing, ok
if ((idle || nodelay_) && emptying_buffer)
goto send;
// if a retransmission
if (is_retransmit)
goto send;
// if big "enough", ok...
// (this is not a likely case, and would
// only happen for tiny windows)
if (datalen >= ((wnd_ * maxseg_) / 2.0))
goto send;
}
if (need_send())
goto send;
/*
* send now if a SYN or special flag "TF_ACKNOW" is set.
* TF_ACKNOW can be set during connection establishment and
* to generate acks for out-of-order data
* kmn 8/28 need to send if there's a push
*/
if ((flags_ & TF_ACKNOW) || (pflags & (TH_SYN|TH_FIN|TH_PUSH)))
goto send;
return; // no reason to send now
send:
//these changed by vj and kmn
if (pflags & TH_FIN) {
if (flags_ & TF_SENTFIN) {
// don't allow seqno to advance past fin
// (the ack generated by a discarded duplicate
// may attempt to do this)
if (seqno >= maxseq_)
--seqno;
} else {
flags_ |= TF_SENTFIN;
++t_seqno_;
}
}
if((pflags & TH_SYN)) {
if ((flags_ & TF_SENTSYN) == 0) {
flags_ |= TF_SENTSYN;
++t_seqno_;
}
}
/*
* fill in packet fields. Agent::allocpkt()
* has already filled most of the network layer
* fields for us. So fill in tcp hdr and adjust
* the packet size.
*/
sendpacket(seqno, rcv_nxt_, pflags, datalen, reason);
last_ack_sent_ = rcv_nxt_;
flags_ &= ~(TF_ACKNOW|TF_DELACK);
t_seqno_ += datalen; // update snd_nxt (t_seqno_)
if (t_seqno_ > maxseq_) {
maxseq_ = t_seqno_; // largest seq# we've sent
/*
* Time this transmission if not a retransmission and
* not currently timing anything.
*/
if (rtt_active_ == FALSE) {
rtt_active_ = TRUE; // set timer
rtt_seq_ = seqno; // timed seq #
}
}
/*
* Set retransmit timer if not currently set,
* and not doing an ack or a keep-alive probe.
* Initial value for retransmit timer is smoothed
* round-trip time + 2 * round-trip time variance.
* Future values are rtt + 4 * rttvar.
*/
if (!(rtx_timer_.status() == TIMER_PENDING) && (t_seqno_ > highest_ack_)) {
set_rtx_timer(); // no timer pending, schedule one
}
}
/*
* Try to send as much data as the window will allow. The link layer will
* do the buffering; we ask the application layer for the size of the packets.
*/
void BayFullTcpAgent::send_much(int force, int reason, int maxburst)
{
/*
* highest_ack is essentially "snd_una" in real TCP
*
* loop while we are in-window (seqno <= (highest_ack + win))
* and there is something to send (t_seqno_ < curseq_+iss_)
*/
int win = window() * maxseg_; // window() in pkts
int npackets = 0;
int topwin = curseq_ + iss_;
if (topwin > highest_ack_ + win)
topwin = highest_ack_ + win;
if (!force && (delsnd_timer_.status() == TIMER_PENDING))
return;
while (force || (t_seqno_ < topwin)) {
if (overhead_ != 0 && !(delsnd_timer_.status() == TIMER_PENDING)) {
delsnd_timer_.resched(Random::uniform(overhead_));
return;
}
output(t_seqno_, reason); // updates seqno for us
force = 0;
if (outflags() & TH_SYN)
break;
if (maxburst && ++npackets >= maxburst)
break;
}
}
void BayFullTcpAgent::cancel_rtx_timeout()
{
if (rtx_timer_.status() == TIMER_PENDING) {
rtx_timer_.cancel();
}
}
/*
* Process an ACK
* this version of the routine doesn't necessarily
* require the ack to be one which advances the ack number
*
* if this ACKs a rtt estimate
* indicate we are not timing
* reset the exponential timer backoff (gamma)
* update rtt estimate
* cancel retrans timer if everything is sent and ACK'd, else set it
* advance the ack number if appropriate
* update segment to send next if appropriate
*/
void BayFullTcpAgent::newack(Packet* pkt)
{
hdr_tcp *tcph = hdr_tcp::access(pkt);
register int ackno = tcph->ackno();
// we were timing the segment and we
// got an ACK for it
if (rtt_active_ && ackno >= rtt_seq_) {
/* got a rtt sample */
rtt_active_ = FALSE; // no longer timing
t_backoff_ = 1; // stop exp backoff
}
/* always with timestamp option */
double tao = now() - tcph->ts();
rtt_update(tao);
if (ackno >= maxseq_)
cancel_rtx_timeout();
else {
if (ackno > highest_ack_) {
set_rtx_timer();
}
}
// advance the ack number if this is for new data
if (ackno > highest_ack_)
highest_ack_ = ackno;
// set up the next packet to send
if (t_seqno_ < highest_ack_)
t_seqno_ = highest_ack_; // thing to send next
}
/*
* nuked this stuff, but left in method - kmn
*/
int BayFullTcpAgent::predict_ok(Packet* )
{
return 0;
}
/*
* fast_retransmit using the given seqno
* perform a fast retransmit
* kludge t_seqno_ (snd_nxt) so we do the
* retransmit then continue from where we were
*/
void BayFullTcpAgent::fast_retransmit(int seq)
{
rtt_backoff(); // bug fix by van to avoid spurious rtx
int onxt = t_seqno_; // output() changes t_seqno_
recover_ = maxseq_; // keep a copy of highest sent
recover_cause_ = REASON_DUPACK; // why we started this recovery period
output(seq, REASON_DUPACK); // send one pkt
t_seqno_ = onxt;
}
/*
* real tcp determines if the remote
* side should receive a window update/ACK from us, and often
* results in sending an update every 2 segments, thereby
* giving the familiar 2-packets-per-ack behavior of TCP.
* Here, we don't advertise any windows, so we just see if
* there's at least 'segs_per_ack_' pkts not yet acked
*/
/* kmn - adding code to switch from one seg per ack to set value
*/
int BayFullTcpAgent::need_send()
{
//first cut, send if anything to ack. Might need maxseg_
if(flags_ & TF_ACKNOW)
return 1;
if(rcv_nxt_ < switch_spa_thresh_)
return ((rcv_nxt_ - last_ack_sent_) >= 1);
return ((rcv_nxt_ - last_ack_sent_) >= (segs_per_ack_ * maxseg_));
}
/*
* deal with timers going off.
* 2 types for now:
* retransmission timer (TCP_TIMER_RTX)
* delayed send (randomization) timer (TCP_TIMER_DELSND)
*
* real TCP initializes the RTO as 6 sec
* ( ^ 3sec, kmn )
* (A + 2D, where A=0, D=3), [Stevens p. 305]
* and thereafter uses
* (A + 4D, where A and D are dynamic estimates)
*
* note that in the simulator t_srtt_, t_rttvar_ and t_rtt_
* are all measured in 'tcp_tick_'-second units
*/
void BayFullTcpAgent::timeout(int tno)
{
if(state_ == TCPS_CLOSED || state_ == TCPS_LISTEN)
return;
/* retransmit timer */
if (tno == TCP_TIMER_RTX) {
++nrexmit_;
recover_ = maxseq_;
recover_cause_ = REASON_TIMEOUT;
slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_RESTART);
//changed 6/10/00 to look at rtx problem -kmn
/* if(highest_ack_ == maxseq_)
reset_rtx_timer(0,0);
else
reset_rtx_timer(0,1);
*/
reset_rtx_timer(1);
t_seqno_ = highest_ack_;
dupacks_ = 0;
send_much(1, REASON_TIMEOUT);
} else if (tno == TCP_TIMER_DELSND) {
/*
* delayed-send timer, with random overhead
* to avoid phase effects
*/
send_much(1, PF_TIMEOUT);
} else if (tno == TCP_TIMER_DELACK) {
if (flags_ & TF_DELACK) {
flags_ &= ~TF_DELACK;
flags_ |= TF_ACKNOW;
send_much(1, REASON_NORMAL, 0);
}
delack_timer_.resched(delack_interval_);
} else {
fprintf(stderr, "%f: (%s) UNKNOWN TIMEOUT %d\n",
now(), name(), tno);
}
}
/*
* introduced kedar
*/
void BayDelAckTimer::expire(Event *) {
a_->timeout(TCP_TIMER_DELACK);
}
/*
* main reception path -
* called from the agent that handles the data path below in its muxing mode
* advance() is called when connection is established with size sent from
* user/application agent
*/
void BayFullTcpAgent::recv(Packet *pkt, Handler*)
{
hdr_tcp *tcph = hdr_tcp::access(pkt);
hdr_cmn *th = hdr_cmn::access(pkt);
hdr_ip *iph = hdr_ip::access(pkt);
int needoutput = 0;
int ourfinisacked = 0;
int todrop = 0;
int dupseg = FALSE;
#ifdef notdef
if (trace_)
plot();
#endif
//
// if no delayed-ACK timer is set, set one
// they are set to fire every 'interval_' secs, starting
// at time t0 = (0.0 + k * interval_) for some k such
// that t0 > now
//
/*
if (!pending_[TCP_TIMER_DELACK]) {
*/
if (!(delack_timer_.status() == TIMER_PENDING)) {
double now = Scheduler::instance().clock();
int last = int(now / delack_interval_);
delack_timer_.resched(delack_interval_ * (last + 1.0) - now);
}
int datalen = th->size() - tcph->hlen();
int ackno = tcph->ackno(); // ack # from packet
// nuked header prediction code that was here - kmn 8/5/97
int tiflags = tcph->flags() ; // tcp flags from packet
switch (state_) {
case TCPS_LISTEN: /* awaiting peer's SYN */
if (tiflags & TH_ACK) {
if (tiflags & TH_FIN) {
sendpacket(tcph->ackno(), tcph->seqno()+1,
TH_ACK, 0, REASON_NORMAL);
goto drop;
}
// ACK shouldn't be on here
// kmn - this can be from previous connection if reusing
// fprintf(stderr,
// "%f: BayFullTcpAgent::recv(%s): got ACK(%d) while in LISTEN\n",
// now(), name(), ackno);
goto drop;
}
if ((tiflags & TH_SYN) == 0) {
// we're looking for a SYN in return
fprintf(stderr,
"%f: BayFullTcpAgent::recv(%s): got a non-SYN while in LISTEN\n",
now(), name());
goto drop;
}
flags_ |= TF_ACKNOW;
state_ = TCPS_SYN_RECEIVED;
rcv_nxt_ = tcph->seqno() + 1; //kmn
t_seqno_ = iss_;
//kmn - switch from one to set segs per ack
switch_spa_thresh_ = rcv_nxt_ + (16 * 1024);
goto step6;
case TCPS_SYN_SENT: /* we sent SYN, expecting SYN+ACK */
if ((tiflags & TH_ACK) && (ackno > maxseq_)) {
// not an ACK for our SYN, discard
// fprintf(stderr,
// "%f: BayFullTcpAgent::recv(%s): bad ACK (%d) for our SYN(%d)\n",
// now(), name(), int(ackno), int(maxseq_));
goto drop;
}
if ((tiflags & TH_SYN) == 0) {
// we're looking for a SYN in return
fprintf(stderr,
"%f: BayFullTcpAgent::recv(%s): no SYN for our SYN(%d)\n",
now(), name(), int(maxseq_));
goto drop;
}
rcv_nxt_ = tcph->seqno()+1; // initial expected seq#
//kmn - switch from one to set segs per ack
switch_spa_thresh_ = rcv_nxt_ + (16 * 1024);
cancel_rtx_timeout(); // cancel timer on our 1st SYN
flags_ |= TF_ACKNOW; // ACK peer's SYN
if (tiflags & TH_ACK) {
// got SYN+ACK (what we're expecting)
// set up to ACK peer's SYN+ACK
newack(pkt);
state_ = TCPS_ESTABLISHED;
} else {
// simultaneous active opens
state_ = TCPS_SYN_RECEIVED;
}
goto step6;
}
// check for redundant data at head/tail of segment
// note that the 4.4bsd [Net/3] code has
// a bug here which can cause us to ignore the
// perfectly good ACKs on duplicate segments. The
// fix is described in (Stevens, Vol2, p. 959-960).
// This code is based on that correction.
//
// In addition, it has a modification so that duplicate segments
// with dup acks don't trigger a fast retransmit when dupseg_fix_
// is enabled.
//
todrop = rcv_nxt_ - tcph->seqno(); // how much overlap?
if (todrop > 0) {
// segment is something we've seen (perhaps partially)
if (tiflags & TH_SYN) {
t_seqno_ = highest_ack_;
if ((tiflags & TH_ACK) == 0)
goto dropafterack;
tiflags &= ~TH_SYN;
}
if (todrop > datalen ||
(todrop == datalen && ((tiflags & TH_FIN) == 0))) {
/*
* Any valid FIN must be to the left of the window.
* At this point the FIN must be a duplicate or out
* of sequence; drop it.
*/
tiflags &= ~TH_FIN;
/*
* Send an ACK to resynchronize and drop any data.
* But keep on processing for RST or ACK.
*/
flags_ |= TF_ACKNOW;
todrop = datalen;
dupseg = TRUE;
}
tcph->seqno() += todrop;
datalen -= todrop;
}
if (tiflags & TH_SYN) {
fprintf(stderr,
"%f: %d.%d>%d.%d BayFullTcpAgent::recv(%s) received unexpected SYN (state:%d)\n",
now(),
iph->saddr(), iph->sport(),
iph->daddr(), iph->dport(),
name(), state_);
goto drop;
}
if ((tiflags & (TH_SYN|TH_ACK)) == 0) {
fprintf(stderr, "%f: %d.%d>%d.%d BayFullTcpAgent::recv(%s) got packet lacking ACK (seq %d)\n",
now(),
iph->saddr(), iph->sport(),
iph->daddr(), iph->dport(),
name(), tcph->seqno());
goto drop;
}
/*
* ACK processing
*/
switch (state_) {
case TCPS_SYN_RECEIVED: /* got ACK for our SYN+ACK */
if (ackno < highest_ack_ || ackno > maxseq_) {
// not in useful range
goto drop;
}
state_ = TCPS_ESTABLISHED;
/* fall into ... */
/*
* In ESTABLISHED state: drop duplicate ACKs; ACK out of range
* ACKs. If the ack is in the range
* tp->snd_una < ti->ti_ack <= tp->snd_max
* then advance tp->snd_una to ti->ti_ack and drop
* data from the retransmission queue.
*
* note that states CLOSE_WAIT and TIME_WAIT aren't used
* in the simulator
*/
case TCPS_ESTABLISHED:
case TCPS_FIN_WAIT_1:
case TCPS_FIN_WAIT_2:
case TCPS_CLOSING:
case TCPS_LAST_ACK:
// look for dup ACKs (dup ack numbers, no data)
//
// do fast retransmit/recovery if at/past thresh
if (ackno <= highest_ack_) {
// an ACK which doesn't advance highest_ack_
if (datalen == 0 && (!dupseg_fix_ || !dupseg)) {
/*
* If we have outstanding data
* this is a completely
* duplicate ack,
* the ack is the biggest we've
* seen and we've seen exactly our rexmt
* threshhold of them, assume a packet
* has been dropped and retransmit it.
*
* We know we're losing at the current
* window size so do congestion avoidance.
*
* Dup acks mean that packets have left the
* network (they're now cached at the receiver)
* so bump cwnd by the amount in the receiver
* to keep a constant cwnd packets in the
* network.
*/
if (!(rtx_timer_.status() == TIMER_PENDING) ||
ackno != highest_ack_) {
// not timed, or re-ordered ACK
dupacks_ = 0;
} else if (bug_fix_ &&
highest_ack_ == recover_ &&
recover_cause_ == REASON_TIMEOUT) {
// doing timeout recovery not fastrxmit
dupacks_ = 0;
} else if (++dupacks_ == tcprexmtthresh_) {
slowdown(CLOSE_SSTHRESH_HALF|CLOSE_CWND_HALF);
cancel_rtx_timeout();
rtt_active_ = FALSE;
fast_retransmit(ackno);
// we measure cwnd in packets,
// so don't scale by maxseg_
// as real TCP does
cwnd_ = ssthresh_ + dupacks_;
goto drop;
} else if (dupacks_ > tcprexmtthresh_) {
// we just measure cwnd in packets,
// so don't scale by maxset_ as real
// tcp does
cwnd_++; // fast recovery
send_much(0, REASON_NORMAL, 0);
goto drop;
}
} else {
// non-zero length segment
// (or window changed in real TCP).
if (dupack_reset_)
dupacks_ = 0;
}
break; /* take us to "step6" */
}
/*
* we've finished the fast retransmit/recovery period
* (i.e. received an ACK which advances highest_ack_)
*/
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
if (dupacks_ >= tcprexmtthresh_ && cwnd_ > ssthresh_) {
/*
* make sure we send at most 2 packets due to this ack
*/
cwnd_ = (maxseq_ - ackno + maxseg_ - 1)
/ maxseg_ + 2;
}
dupacks_ = 0;
if (ackno > maxseq_) {
// ack more than we sent(!?)
fprintf(stderr,
"%f: BayFullTcpAgent::recv(%s) too-big ACK (ack: %d, maxseq:%d)\n",
now(), name(), int(ackno), int(maxseq_));
goto dropafterack;
}
/*
* If we have a timestamp reply, update smoothed
* round trip time. If no timestamp is present but
* transmit timer is running and timed sequence
* number was acked, update smoothed round trip time.
* Since we now have an rtt measurement, cancel the
* timer backoff (cf., Phil Karn's retransmit alg.).
* Recompute the initial retransmit timer.
*
* If all outstanding data is acked, stop retransmit
* If there is more data to be acked, restart retransmit
* timer, using current (possibly backed-off) value.
*/
newack(pkt);
if (state_ == TCPS_ESTABLISHED && ackno < maxseq_)
needoutput = 1;
/* kmn - 8/12/97: don't want to do this on first
* data send, especially to compare IWs
* So added test.
*/
if(first_data_)
opencwnd();
// kmn - 8/15 added second test that is acking fin
if ((state_ == TCPS_FIN_WAIT_1 || state_ == TCPS_FIN_WAIT_2
|| state_ == TCPS_LAST_ACK || state_ == TCPS_CLOSING)
&& ackno >= (curseq_ + iss_)) // && ackno == maxseq_)
ourfinisacked = 1;
else
ourfinisacked = 0;
// additional processing when we're in special states
switch (state_) {
/*
* In FIN_WAIT_1 STATE in addition to the processing
* for the ESTABLISHED state if our FIN is now acknowledged
* then enter FIN_WAIT_2.
*/
case TCPS_FIN_WAIT_1: /* doing active close */
if (ourfinisacked)
state_ = TCPS_FIN_WAIT_2;
break;
/*
* In CLOSING STATE in addition to the processing for
* the ESTABLISHED state if the ACK acknowledges our FIN
* then enter the TIME-WAIT state, otherwise ignore
* the segment.
*/
case TCPS_CLOSING: /* simultaneous active close */;
if (ourfinisacked)
state_ = TCPS_CLOSED;
break;
/*
* In LAST_ACK, we may still be waiting for data to drain
* and/or to be acked, as well as for the ack of our FIN.
* If our FIN is now acknowledged,
* enter the closed state and return.
*/
case TCPS_LAST_ACK: /* passive close */
if (ourfinisacked) {
state_ = TCPS_CLOSED; //kmn added 2 lines
/*
for(int i =0; i < NTIMER; i++) {
cancel(i);
}
*/
goto drop;
} else { //should be a FIN we've seen
fprintf(stderr,
"%f: %d.%d>%d.%d BayFullTcpAgent::recv(%s) received non-ACK (state:%d)\n",
now(),
iph->saddr(), iph->sport(),
iph->daddr(), iph->dport(),
name(), state_);
}
/* no case for TIME_WAIT in simulator */
} // inner switch
} // outer switch
step6:
/* real TCP handles window updates and URG data here */
/* dodata: this label is in the "real" code.. here only for reference */
/*
* DATA processing
* kmn - several changes here to talk to application agent
*/
if (datalen > 0 || (tiflags & TH_FIN)) {
first_data_ = 1; //now seen first data
// see the "TCP_REASS" macro for this code
if (tcph->seqno() == rcv_nxt_ && rq_.empty()) {
// got the in-order packet we were looking
// for, nobody is in the reassembly queue,
// so this is the common case...
// note: in "real" TCP we must also be in
// ESTABLISHED state to come here, because
// data arriving before ESTABLISHED is
// queued in the reassembly queue. Since we
// don't really have a process anyhow, just
// accept the data here as-is (i.e. don't
// require being in ESTABLISHED state)
tiflags &= TH_FIN;
if (tiflags) {
++rcv_nxt_;
}
flags_ |= TF_DELACK;
rcv_nxt_ += datalen;
// give to "application" here
// added 7/30/97 by kmn to call application with
// number of bytes since last push (if any)
// the server is going to call advance before this
// completes, so changed advance to not call
// send_much if ESTABLISHED. curseq gets
// checked below.
//
if(datalen && app_ && (tcph->flags() & TH_PUSH)) {
//rcv_nxt_ - last_upcalled_bytes_;
app_->recv(pkt,this,DATA_PUSH);
//last_upcalled_bytes_ = rcv_nxt_;
}
needoutput = need_send();
} else {
// not the one we want next (or it
// is but there's stuff on the reass queue);
// do whatever we need to do for out-of-order
// segments or hole-fills. Also,
// send an ACK to the other side right now.
tiflags = rq_.add(pkt);
if (tiflags & TH_PUSH) {
if (app_ != NULL )
app_->recv(pkt,this,DATA_PUSH);
needoutput = need_send();
} else
flags_ |= TF_ACKNOW;
//reset for losses
switch_spa_thresh_ = rcv_nxt_ + (16 * 1024);
}
}
/*
* if FIN is received, ACK the FIN
* (let user know if we could do so)
*/
if (tiflags & TH_FIN) {
flags_ |= TF_ACKNOW;
rq_.clear(); // other side shutting down
switch (state_) {
/*
* In SYN_RECEIVED and ESTABLISHED STATES
* enter the CLOSE_WAIT state.
* (in the simulator, go to LAST_ACK)
* (passive close)
*/
case TCPS_SYN_RECEIVED:
case TCPS_ESTABLISHED:
state_ = TCPS_LAST_ACK;
break;
/*
* If still in FIN_WAIT_1 STATE FIN has not been acked so
* enter the CLOSING state.
* (simultaneous close)
*/
case TCPS_FIN_WAIT_1:
state_ = TCPS_CLOSING;
break;
/*
* In FIN_WAIT_2 state enter the TIME_WAIT state,
* starting the time-wait timer, turning off the other
* standard timers.
* (in the simulator, just go to CLOSED)
* (active close)
*/
case TCPS_FIN_WAIT_2:
state_ = TCPS_CLOSED;
cancel_rtx_timeout();
break;
}
}
if (needoutput || (flags_ & TF_ACKNOW))
send_much(1, REASON_NORMAL, 0);
else if ((curseq_ + iss_) > highest_ack_)
send_much(0, REASON_NORMAL, 0);
/* kmn - ugh, egregious hack. Can tell it's a server
* so it goes to listen state. Do something
* else if this becomes more stable
*/
if(state_ == TCPS_CLOSED) {
if(close_on_empty_) {
reinit();
curseq_ = iss_;
state_ = TCPS_LISTEN;
} else { /*"something else" - kmn 6/00 */
if (app_ != NULL )
app_->recv(pkt,this,CONNECTION_END);
}
}
Packet::free(pkt);
return;
dropafterack:
flags_ |= TF_ACKNOW;
send_much(1, REASON_NORMAL, 0);
drop:
Packet::free(pkt);
return;
}
void BayFullTcpAgent::reset_rtx_timer(int )
{
// cancel old timer,
// set a new one
rtt_backoff(); // double current timeout
set_rtx_timer(); // set new timer
rtt_active_ = FALSE;
}
/*
* do an active open
* (in real TCP, see tcp_usrreq, case PRU_CONNECT)
*/
void BayFullTcpAgent::connect()
{
state_ = TCPS_SYN_SENT; // sending a SYN now
if (!data_on_syn_) {
// force no data in this segment
int cur = curseq_;
curseq_ = iss_;
output(iss_, REASON_NORMAL);
curseq_ = cur + 1; //think I have to add in the syn here
return;
}
output(iss_, REASON_NORMAL);
return;
}
/*
* be a passive opener
* (in real TCP, see tcp_usrreq, case PRU_LISTEN)
* (for simulation, make this peer's ptype ACKs)
*/
void BayFullTcpAgent::listen()
{
state_ = TCPS_LISTEN;
type_ = PT_TCP; // changed by kmn 8/6/97
//type_ = PT_ACK; // instead of PT_TCP
}
/*
* called when user/application performs 'close'
*/
void BayFullTcpAgent::usrclosed()
{
switch (state_) {
case TCPS_CLOSED:
case TCPS_LISTEN:
case TCPS_SYN_SENT:
state_ = TCPS_CLOSED;
break;
case TCPS_SYN_RECEIVED:
case TCPS_ESTABLISHED:
state_ = TCPS_FIN_WAIT_1;
send_much(1, REASON_NORMAL, 0);
break;
}
return;
}
int BayFullTcpAgent::command(int argc, const char*const* argv)
{
// would like to have some "connect" primitive
// here, but the problem is that we get called before
// the simulation is running and we want to send a SYN.
// Because no routing exists yet, this fails.
// Instead, see code in advance() above.
//
// listen can happen any time because it just changes state_
//
// close is designed to happen at some point after the
// simulation is running (using an ns 'at' command)
Tcl& tcl = Tcl::instance();
if (argc == 2) {
if (strcmp(argv[1], "listen") == 0) {
// just a state transition
listen();
return (TCL_OK);
}
if (strcmp(argv[1], "close") == 0) {
usrclosed();
return (TCL_OK);
}
}
if (argc == 3) {
if (strcmp(argv[1], "advance") == 0) {
advance(atoi(argv[2]));
return (TCL_OK);
}
//added 7/31/97 by kmn to work with apps, specifically www
// probably should use a special type of agent...
if (strcmp(argv[1], "attach-application") == 0) {
app_ = (BayTcpAppAgent *)TclObject::lookup(argv[2]);
if (app_ == 0) {
tcl.resultf("no such agent %s", argv[2]);
return(TCL_ERROR);
}
return(TCL_OK);
}
//added by kmn 8/12/97
if (strcmp(argv[1], "initial-window") == 0) {
wnd_init_ = atoi(argv[2]);
cwnd_ = wnd_init_;
awnd_ = wnd_init_ /2.0;
return(TCL_OK);
}
}
return (TcpAgent::command(argc, argv));
}
/*
* clear out reassembly queue
*/
void BayReassemblyQueue::clear()
{
seginfo* p;
seginfo* n;
for (p = head_; p != NULL; p = n) {
n = p->next_;
delete p;
}
head_ = tail_ = NULL;
return;
}
/*
* add a packet to the reassembly queue..
* will update BayFullTcpAgent::rcv_nxt_ by way of the
* BayReassemblyQueue::rcv_nxt_ integer reference (an alias)
*/
int BayReassemblyQueue::add(Packet* pkt)
{
hdr_tcp *tcph = hdr_tcp::access(pkt);
hdr_cmn *th = hdr_cmn::access(pkt);
int start = tcph->seqno();
int end = start + th->size() - tcph->hlen();
int tiflags = tcph->flags();
seginfo *q, *p, *n;
if (head_ == NULL) {
// nobody there, just insert
tail_ = head_ = new seginfo;
head_->prev_ = NULL;
head_->next_ = NULL;
head_->startseq_ = start;
head_->endseq_ = end;
head_->flags_ = tiflags;
} else {
p = NULL;
n = new seginfo;
n->startseq_ = start;
n->endseq_ = end;
n->flags_ = tiflags;
if (tail_->endseq_ <= start) {
// common case of end of reass queue
p = tail_;
goto endfast;
}
q = head_;
// look for the segment after this one
while (q != NULL && (end > q->startseq_))
q = q->next_;
// set p to the segment before this one
if (q == NULL)
p = tail_;
else
p = q->prev_;
if (p == NULL) {
// insert at head
n->next_ = head_;
n->prev_ = NULL;
head_->prev_ = n;
head_ = n;
} else {
endfast:
// insert in the middle or end
n->next_ = p->next_;
if (p->next_)
p->next_->prev_ = n;
p->next_ = n;
n->prev_ = p;
if (p == tail_)
tail_ = n;
}
}
//
// look for a sequence of in-order segments and
// set rcv_nxt if we can
//
if (head_->startseq_ > rcv_nxt_)
return 0; // still awaiting a hole-fill
tiflags = 0;
p = head_;
while (p != NULL) {
// update rcv_nxt_ to highest in-seq thing
// and delete the entry from the reass queue
rcv_nxt_ = p->endseq_;
tiflags |= p->flags_;
q = p;
if (q->prev_)
q->prev_->next_ = q->next_;
else
head_ = q->next_;
if (q->next_)
q->next_->prev_ = q->prev_;
else
tail_ = q->prev_;
if (q->next_ && (q->endseq_ < q->next_->startseq_)) {
delete q;
break; // only the in-seq stuff
}
p = p->next_;
delete q;
}
return (tiflags);
}
syntax highlighted by Code2HTML, v. 0.9.1