tcp: implement RFC5682 F-RTO
[linux-block.git] / net / ipv4 / tcp_input.c
index 8d821e45b9178ed85287e2835eedb43507e72ed5..b2b36196b3427c015f670925bcb4ddfdbf4451cb 100644 (file)
@@ -107,6 +107,7 @@ int sysctl_tcp_early_retrans __read_mostly = 3;
 #define FLAG_DATA_SACKED       0x20 /* New SACK.                               */
 #define FLAG_ECE               0x40 /* ECE in this ACK                         */
 #define FLAG_SLOWPATH          0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ORIG_SACK_ACKED   0x200 /* Never retransmitted data are (s)acked  */
 #define FLAG_SND_UNA_ADVANCED  0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
 #define FLAG_DSACKING_ACK      0x800 /* SACK blocks contained D-SACK info */
 #define FLAG_SACK_RENEGING     0x2000 /* snd_una advanced to a sacked seq */
@@ -1155,6 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
                                           tcp_highest_sack_seq(tp)))
                                        state->reord = min(fack_count,
                                                           state->reord);
+                               if (!after(end_seq, tp->high_seq))
+                                       state->flag |= FLAG_ORIG_SACK_ACKED;
                        }
 
                        if (sacked & TCPCB_LOST) {
@@ -1835,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how)
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
+       bool new_recovery = false;
 
        /* Reduce ssthresh if it has not yet been made inside this window. */
-       if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq ||
+       if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+           !after(tp->high_seq, tp->snd_una) ||
            (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+               new_recovery = true;
                tp->prior_ssthresh = tcp_current_ssthresh(sk);
                tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
                tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -1883,6 +1889,14 @@ void tcp_enter_loss(struct sock *sk, int how)
        tcp_set_ca_state(sk, TCP_CA_Loss);
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
+
+       /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+        * loss recovery is underway except recurring timeout(s) on
+        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+        */
+       tp->frto = sysctl_tcp_frto &&
+                  (new_recovery || icsk->icsk_retransmits) &&
+                  !inet_csk(sk)->icsk_mtup.probe_size;
 }
 
 /* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2426,12 +2440,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
        return failed;
 }
 
-/* Undo during loss recovery after partial ACK. */
-static bool tcp_try_undo_loss(struct sock *sk)
+/* Undo during loss recovery after partial ACK or using F-RTO. */
+static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (tcp_may_undo(tp)) {
+       if (frto_undo || tcp_may_undo(tp)) {
                struct sk_buff *skb;
                tcp_for_write_queue(skb, sk) {
                        if (skb == tcp_send_head(sk))
@@ -2445,9 +2459,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
                tp->lost_out = 0;
                tcp_undo_cwr(sk, true);
                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
+               if (frto_undo)
+                       NET_INC_STATS_BH(sock_net(sk),
+                                        LINUX_MIB_TCPSPURIOUSRTOS);
                inet_csk(sk)->icsk_retransmits = 0;
                tp->undo_marker = 0;
-               if (tcp_is_sack(tp))
+               if (frto_undo || tcp_is_sack(tp))
                        tcp_set_ca_state(sk, TCP_CA_Open);
                return true;
        }
@@ -2667,24 +2684,52 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
  * recovered or spurious. Otherwise retransmits more on partial ACKs.
  */
-static void tcp_process_loss(struct sock *sk, int flag)
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
+       bool recovered = !before(tp->snd_una, tp->high_seq);
 
-       if (!before(tp->snd_una, tp->high_seq)) {
+       if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+               if (flag & FLAG_ORIG_SACK_ACKED) {
+                       /* Step 3.b. A timeout is spurious if not all data are
+                        * lost, i.e., never-retransmitted data are (s)acked.
+                        */
+                       tcp_try_undo_loss(sk, true);
+                       return;
+               }
+               if (after(tp->snd_nxt, tp->high_seq) &&
+                   (flag & FLAG_DATA_SACKED || is_dupack)) {
+                       tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+               } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
+                       tp->high_seq = tp->snd_nxt;
+                       __tcp_push_pending_frames(sk, tcp_current_mss(sk),
+                                                 TCP_NAGLE_OFF);
+                       if (after(tp->snd_nxt, tp->high_seq))
+                               return; /* Step 2.b */
+                       tp->frto = 0;
+               }
+       }
+
+       if (recovered) {
+               /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
                icsk->icsk_retransmits = 0;
                tcp_try_undo_recovery(sk);
                return;
        }
-
        if (flag & FLAG_DATA_ACKED)
                icsk->icsk_retransmits = 0;
-       if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
-               tcp_reset_reno_sack(tp);
-       if (tcp_try_undo_loss(sk))
+       if (tcp_is_reno(tp)) {
+               /* A Reno DUPACK means new data in F-RTO step 2.b above are
+                * delivered. Lower inflight to clock out (re)tranmissions.
+                */
+               if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
+                       tcp_add_reno_sack(sk);
+               else if (flag & FLAG_SND_UNA_ADVANCED)
+                       tcp_reset_reno_sack(tp);
+       }
+       if (tcp_try_undo_loss(sk, false))
                return;
-       tcp_moderate_cwnd(tp);
        tcp_xmit_retransmit_queue(sk);
 }
 
@@ -2764,7 +2809,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
                break;
        case TCP_CA_Loss:
-               tcp_process_loss(sk, flag);
+               tcp_process_loss(sk, flag, is_dupack);
                if (icsk->icsk_ca_state != TCP_CA_Open)
                        return;
                /* Fall through to processing in Open state. */
@@ -3003,6 +3048,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        }
                        if (!(sacked & TCPCB_SACKED_ACKED))
                                reord = min(pkts_acked, reord);
+                       if (!after(scb->end_seq, tp->high_seq))
+                               flag |= FLAG_ORIG_SACK_ACKED;
                }
 
                if (sacked & TCPCB_SACKED_ACKED)