[TCP]: Appropriate Byte Count support
Stephen Hemminger [Fri, 11 Nov 2005 01:09:53 +0000 (17:09 -0800)]
This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.

The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

Documentation/networking/ip-sysctl.txt
include/linux/sysctl.h
include/linux/tcp.h
include/net/tcp.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_input.c
net/ipv4/tcp_minisocks.c

index 65895bb..ebc09a1 100644 (file)
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
 
 TCP variables: 
 
+tcp_abc - INTEGER
+       Controls Appropriate Byte Count defined in RFC3465. If set to
+       0 then does congestion avoid once per ack. 1 is conservative
+       value, and 2 is more agressive.
+
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
        will be retransmitted. Should not be higher than 255. Default value
index 22cf5e1..ab2791b 100644 (file)
@@ -390,6 +390,7 @@ enum
        NET_TCP_BIC_BETA=108,
        NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
        NET_TCP_CONG_CONTROL=110,
+       NET_TCP_ABC=111,
 };
 
 enum {
index ac4ca44..737b32e 100644 (file)
@@ -326,6 +326,7 @@ struct tcp_sock {
        __u32   snd_up;         /* Urgent pointer               */
 
        __u32   total_retrans;  /* Total retransmits for entire connection */
+       __u32   bytes_acked;    /* Appropriate Byte Counting - RFC3465 */
 
        unsigned int            keepalive_time;   /* time before keep alive takes place */
        unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
index 54c3998..44ba4a2 100644 (file)
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
+extern int sysctl_tcp_abc;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
  */
 static inline void tcp_slow_start(struct tcp_sock *tp)
 {
+       if (sysctl_tcp_abc) {
+               /* RFC3465: Slow Start
+                * TCP sender SHOULD increase cwnd by the number of
+                * previously unacknowledged bytes ACKed by each incoming
+                * acknowledgment, provided the increase is not more than L
+                */
+               if (tp->bytes_acked < tp->mss_cache)
+                       return;
+
+               /* We MAY increase by 2 if discovered delayed ack */
+               if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+               }
+       }
+       tp->bytes_acked = 0;
+
        if (tp->snd_cwnd < tp->snd_cwnd_clamp)
                tp->snd_cwnd++;
 }
@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
 
        tp->prior_ssthresh = 0;
+       tp->bytes_acked = 0;
        if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
                __tcp_enter_cwr(sk);
                tcp_set_ca_state(sk, TCP_CA_CWR);
index 6526856..01444a0 100644 (file)
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
                .proc_handler   = &proc_tcp_congestion_control,
                .strategy       = &sysctl_tcp_congestion_control,
        },
+       {
+               .ctl_name       = NET_TCP_ABC,
+               .procname       = "tcp_abc",
+               .data           = &sysctl_tcp_abc,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
 
        { .ctl_name = 0 }
 };
index 72b7c22..cfaf761 100644 (file)
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        tp->packets_out = 0;
        tp->snd_ssthresh = 0x7fffffff;
        tp->snd_cwnd_cnt = 0;
+       tp->bytes_acked = 0;
        tcp_set_ca_state(sk, TCP_CA_Open);
        tcp_clear_retrans(tp);
        inet_csk_delack_init(sk);
index 6d3e883..c7cc62c 100644 (file)
@@ -192,17 +192,26 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
        /* In "safe" area, increase. */
         if (tp->snd_cwnd <= tp->snd_ssthresh)
                tcp_slow_start(tp);
-       else {
-               /* In dangerous area, increase slowly.
-                * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-                */
-               if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-                               tp->snd_cwnd++;
-                       tp->snd_cwnd_cnt = 0;
-               } else
-                       tp->snd_cwnd_cnt++;
-       }
+
+       /* In dangerous area, increase slowly. */
+       else if (sysctl_tcp_abc) {
+               /* RFC3465: Apppriate Byte Count
+                * increase once for each full cwnd acked
+                */
+               if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+                       tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+               }
+       } else {
+               /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+               if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+                       if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+                               tp->snd_cwnd++;
+                       tp->snd_cwnd_cnt = 0;
+               } else
+                       tp->snd_cwnd_cnt++;
+       }
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
index e430656..4cb5e6f 100644 (file)
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 #define FLAG_DATA              0x01 /* Incoming frame contained data.          */
 #define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window update.       */
@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
        tp->snd_cwnd_cnt   = 0;
        tp->snd_cwnd_stamp = tcp_time_stamp;
 
+       tp->bytes_acked = 0;
        tcp_clear_retrans(tp);
 
        /* Push undo marker, if it was plain RTO and nothing
@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
                        TCP_ECN_queue_cwr(tp);
                }
 
+               tp->bytes_acked = 0;
                tp->snd_cwnd_cnt = 0;
                tcp_set_ca_state(sk, TCP_CA_Recovery);
        }
@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
        if (before(ack, prior_snd_una))
                goto old_ack;
 
+       if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+               tp->bytes_acked += ack - prior_snd_una;
+
        if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
                /* Window is constant, pure forward advance.
                 * No more checks are required.
@@ -4370,6 +4376,7 @@ discard:
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
index b1a63b2..9203a21 100644 (file)
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
                 */
                newtp->snd_cwnd = 2;
                newtp->snd_cwnd_cnt = 0;
+               newtp->bytes_acked = 0;
 
                newtp->frto_counter = 0;
                newtp->frto_highmark = 0;