tcp: instrument how long TCP is limited by insufficient send buffer
authorFrancis Yan <francisyyan@gmail.com>
Mon, 28 Nov 2016 07:07:16 +0000 (23:07 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 30 Nov 2016 15:04:24 +0000 (10:04 -0500)
This patch measures the amount of time when TCP runs out of new data
to send to the network due to insufficient send buffer, while TCP
is still busy delivering (i.e. write queue is not empty). The goal
is to indicate either the send buffer autotuning or user SO_SNDBUF
setting has resulted network under-utilization.

The measurement starts conservatively by checking various conditions
to minimize false claims (i.e. under-estimation is more likely).
The measurement stops when the SOCK_NOSPACE flag is cleared. But it
does not account the time elapsed till the next application write.
Also the measurement only starts if the sender is still busy sending
data, s.t. the limit accounted is part of the total busy time.

Signed-off-by: Francis Yan <francisyyan@gmail.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c

index 913f9bbfc030b0ff476e86f4c7983e896fda0c1c..259ffb50e429bfe97a2f99f3d4e2762afd2243fb 100644 (file)
@@ -996,8 +996,11 @@ do_error:
                goto out;
 out_err:
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        return sk_stream_error(sk, flags, err);
 }
 
@@ -1331,8 +1334,11 @@ do_error:
 out_err:
        err = sk_stream_error(sk, flags, err);
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        release_sock(sk);
        return err;
 }
index a5d17276161004c1f0586d498f603be16ed86c53..56fe736fd64db52805b208678678a96cc547373d 100644 (file)
@@ -5059,8 +5059,11 @@ static void tcp_check_space(struct sock *sk)
                /* pairs with tcp_poll() */
                smp_mb__after_atomic();
                if (sk->sk_socket &&
-                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                        tcp_new_space(sk);
+                       if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+                               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+               }
        }
 }
 
index b74444cee24d361316e180bf68e6049d3ce6492e..d3545d0cff75698d48c83b07eb908ee23d6419ad 100644 (file)
@@ -1514,6 +1514,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
                if (sysctl_tcp_slow_start_after_idle &&
                    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
                        tcp_cwnd_application_limited(sk);
+
+               /* The following conditions together indicate the starvation
+                * is caused by insufficient sender buffer:
+                * 1) just sent some data (see tcp_write_xmit)
+                * 2) not cwnd limited (this else condition)
+                * 3) no more data to send (null tcp_send_head )
+                * 4) application is hitting buffer limit (SOCK_NOSPACE)
+                */
+               if (!tcp_send_head(sk) && sk->sk_socket &&
+                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
+                   (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+                       tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
        }
 }