tcp: track the packet timings in RACK

author Yuchung Cheng <ycheng@google.com>

Sat, 17 Oct 2015 04:57:46 +0000 (21:57 -0700)

committer David S. Miller <davem@davemloft.net>

Wed, 21 Oct 2015 14:00:48 +0000 (07:00 -0700)
author Yuchung Cheng <ycheng@google.com>
Sat, 17 Oct 2015 04:57:46 +0000 (21:57 -0700)
committer David S. Miller <davem@davemloft.net>
Wed, 21 Oct 2015 14:00:48 +0000 (07:00 -0700)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h

index 8c54863dfc38c9006efd74a78029549fc2a0c9a7..5dce9705fe8415e31561ee2c1d52ef17aaeb7495 100644 (file)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -194,6 +194,12 @@ struct tcp_sock {
         u32     window_clamp;   /* Maximal window to advertise          */
         u32     rcv_ssthresh;   /* Current window clamp                 */
  
+       /* Information of the most recently (s)acked skb */
+       struct tcp_rack {
+               struct skb_mstamp mstamp; /* (Re)sent time of the skb */
+               u8 advanced; /* mstamp advanced since last lost marking */
+               u8 reord;    /* reordering detected */
+       } rack;
         u16     advmss;         /* Advertised MSS                       */
         u8      unused;
         u8      nonagle     : 4,/* Disable Nagle algorithm?             */
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 4a43152229eadc8162805b35b0186b1054504d5c..3c3a9fe057d38be63d69bac740ce569e290cf005 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1750,6 +1750,11 @@ int tcpv4_offload_init(void);
  void tcp_v4_init(void);
  void tcp_init(void);
  
+/* tcp_recovery.c */
+
+extern void tcp_rack_advance(struct tcp_sock *tp,
+                            const struct skb_mstamp *xmit_time, u8 sacked);
+
  /*
   * Save and compile IPv4 options, return a pointer to it
   */
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile

index 89aacb630a53362bb9de51c7381ae9b6a799bdc3..c29809f765dc5d4d95edd5d6ac3cc321fcb97c88 100644 (file)
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,6 +8,7 @@ obj-y     := route.o inetpeer.o protocol.o \
              inet_timewait_sock.o inet_connection_sock.o \
              tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
              tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
+            tcp_recovery.o \
              tcp_offload.o datagram.o raw.o udp.o udplite.o \
              udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
              fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 1e97e73e5ecf1a3068355403a3c1a5ea52de98e0..ce8370525832f51021d16c1799cb7919c495d624 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1173,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
                 return sacked;
  
         if (!(sacked & TCPCB_SACKED_ACKED)) {
+               tcp_rack_advance(tp, xmit_time, sacked);
+
                 if (sacked & TCPCB_SACKED_RETRANS) {
                         /* If the segment is not tagged as lost,
                          * we do not clear RETRANS, believing
@@ -2256,6 +2258,16 @@ static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
                before(tp->rx_opt.rcv_tsecr, when);
  }
  
+/* skb is spurious retransmitted if the returned timestamp echo
+ * reply is prior to the skb transmission time
+ */
+static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
+                                    const struct sk_buff *skb)
+{
+       return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
+              tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
+}
+
  /* Nothing was retransmitted or returned timestamp is less
   * than timestamp of the first retransmission.
   */
@@ -3135,6 +3147,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                 if (sacked & TCPCB_SACKED_ACKED)
                         tp->sacked_out -= acked_pcount;
+               else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
+                       tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
                 if (sacked & TCPCB_LOST)
                         tp->lost_out -= acked_pcount;
  
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index b875c288daaa1b91c208b510ba0098946f83b89b..1fd5d413a6642b526c98edc0144b3ceed503bb9d 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                 tcp_ecn_openreq_child(newtp, req);
                 newtp->fastopen_rsk = NULL;
                 newtp->syn_data_acked = 0;
+               newtp->rack.mstamp.v64 = 0;
+               newtp->rack.advanced = 0;
  
                 newtp->saved_syn = req->saved_syn;
                 req->saved_syn = NULL;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c

new file mode 100644 (file)

index 0000000..8f66a65
--- /dev/null
+++ b/net/ipv4/tcp_recovery.c
@@ -0,0 +1,32 @@
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+/* Record the most recently (re)sent time among the (s)acked packets */
+void tcp_rack_advance(struct tcp_sock *tp,
+                     const struct skb_mstamp *xmit_time, u8 sacked)
+{
+       if (tp->rack.mstamp.v64 &&
+           !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+               return;
+
+       if (sacked & TCPCB_RETRANS) {
+               struct skb_mstamp now;
+
+               /* If the sacked packet was retransmitted, it's ambiguous
+                * whether the retransmission or the original (or the prior
+                * retransmission) was sacked.
+                *
+                * If the original is lost, there is no ambiguity. Otherwise
+                * we assume the original can be delayed up to aRTT + min_rtt.
+                * the aRTT term is bounded by the fast recovery or timeout,
+                * so it's at least one RTT (i.e., retransmission is at least
+                * an RTT later).
+                */
+               skb_mstamp_get(&now);
+               if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
+                       return;
+       }
+
+       tp->rack.mstamp = *xmit_time;
+       tp->rack.advanced = 1;
+}
author	Yuchung Cheng <ycheng@google.com>
	Sat, 17 Oct 2015 04:57:46 +0000 (21:57 -0700)
committer	David S. Miller <davem@davemloft.net>
	Wed, 21 Oct 2015 14:00:48 +0000 (07:00 -0700)
include/linux/tcp.h		patch \| blob \| blame \| history
include/net/tcp.h		patch \| blob \| blame \| history
net/ipv4/Makefile		patch \| blob \| blame \| history
net/ipv4/tcp_input.c		patch \| blob \| blame \| history
net/ipv4/tcp_minisocks.c		patch \| blob \| blame \| history
net/ipv4/tcp_recovery.c	[new file with mode: 0644]	patch \| blob