tcp: don't use timestamp from repaired skb-s to calculate RTT (v2)
authorAndrey Vagin <avagin@openvz.org>
Wed, 13 Aug 2014 12:03:10 +0000 (16:03 +0400)
committerDavid S. Miller <davem@davemloft.net>
Thu, 14 Aug 2014 21:38:54 +0000 (14:38 -0700)
We don't know right timestamp for repaired skb-s. Wrong RTT estimations
isn't good, because some congestion modules heavily depends on it.

This patch adds the TCPCB_REPAIRED flag, which is included in
TCPCB_RETRANS.

Thanks to Eric for the advice how to fix this issue.

This patch fixes the warning:
[  879.562947] WARNING: CPU: 0 PID: 2825 at net/ipv4/tcp_input.c:3078 tcp_ack+0x11f5/0x1380()
[  879.567253] CPU: 0 PID: 2825 Comm: socket-tcpbuf-l Not tainted 3.16.0-next-20140811 #1
[  879.567829] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[  879.568177]  0000000000000000 00000000c532680c ffff880039643d00 ffffffff817aa2d2
[  879.568776]  0000000000000000 ffff880039643d38 ffffffff8109afbd ffff880039d6ba80
[  879.569386]  ffff88003a449800 000000002983d6bd 0000000000000000 000000002983d6bc
[  879.569982] Call Trace:
[  879.570264]  [<ffffffff817aa2d2>] dump_stack+0x4d/0x66
[  879.570599]  [<ffffffff8109afbd>] warn_slowpath_common+0x7d/0xa0
[  879.570935]  [<ffffffff8109b0ea>] warn_slowpath_null+0x1a/0x20
[  879.571292]  [<ffffffff816d0a05>] tcp_ack+0x11f5/0x1380
[  879.571614]  [<ffffffff816d10bd>] tcp_rcv_established+0x1ed/0x710
[  879.571958]  [<ffffffff816dc9da>] tcp_v4_do_rcv+0x10a/0x370
[  879.572315]  [<ffffffff81657459>] release_sock+0x89/0x1d0
[  879.572642]  [<ffffffff816c81a0>] do_tcp_setsockopt.isra.36+0x120/0x860
[  879.573000]  [<ffffffff8110a52e>] ? rcu_read_lock_held+0x6e/0x80
[  879.573352]  [<ffffffff816c8912>] tcp_setsockopt+0x32/0x40
[  879.573678]  [<ffffffff81654ac4>] sock_common_setsockopt+0x14/0x20
[  879.574031]  [<ffffffff816537b0>] SyS_setsockopt+0x80/0xf0
[  879.574393]  [<ffffffff817b40a9>] system_call_fastpath+0x16/0x1b
[  879.574730] ---[ end trace a17cbc38eb8c5c00 ]---

v2: moving setting of skb->when for repaired skb-s in tcp_write_xmit,
    where it's set for other skb-s.

Fixes: 431a91242d8d ("tcp: timestamp SYN+DATA messages")
Fixes: 740b0f1841f6 ("tcp: switch rtt estimations to usec resolution")
Cc: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_output.c

index dafa1cbc149bc54a6e86db9cc69c2c2132f86513..36f55254573f24b125a7a48196f7433d6fbbfd37 100644 (file)
@@ -705,8 +705,10 @@ struct tcp_skb_cb {
 #define TCPCB_SACKED_RETRANS   0x02    /* SKB retransmitted            */
 #define TCPCB_LOST             0x04    /* SKB is lost                  */
 #define TCPCB_TAGBITS          0x07    /* All tag bits                 */
+#define TCPCB_REPAIRED         0x10    /* SKB repaired (no skb_mstamp) */
 #define TCPCB_EVER_RETRANS     0x80    /* Ever retransmitted frame     */
-#define TCPCB_RETRANS          (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
+#define TCPCB_RETRANS          (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
+                               TCPCB_REPAIRED)
 
        __u8            ip_dsfield;     /* IPv4 tos or IPv6 dsfield     */
        /* 1 byte hole */
index 181b70ebd9641edb54afb158722e95272e4e753d..541f26a67ba28861c882f819369baf2ad2cd4724 100644 (file)
@@ -1187,13 +1187,6 @@ new_segment:
                                if (!skb)
                                        goto wait_for_memory;
 
-                               /*
-                                * All packets are restored as if they have
-                                * already been sent.
-                                */
-                               if (tp->repair)
-                                       TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
                                /*
                                 * Check whether we can use HW checksum.
                                 */
@@ -1203,6 +1196,13 @@ new_segment:
                                skb_entail(sk, skb);
                                copy = size_goal;
                                max = size_goal;
+
+                               /* All packets are restored as if they have
+                                * already been sent. skb_mstamp isn't set to
+                                * avoid wrong rtt estimation.
+                                */
+                               if (tp->repair)
+                                       TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
                        }
 
                        /* Try to append data to the end of skb. */
index ef4a051de018d36ea77b17f6dafecea0c549bee2..ff3f0f75cc6c580a48ae81a60b37106bc741e364 100644 (file)
@@ -1934,8 +1934,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                BUG_ON(!tso_segs);
 
-               if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+               if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+                       /* "when" is used as a start point for the retransmit timer */
+                       TCP_SKB_CB(skb)->when = tcp_time_stamp;
                        goto repair; /* Skip network transmission */
+               }
 
                cwnd_quota = tcp_cwnd_test(tp, skb);
                if (!cwnd_quota) {