rxrpc: Only set DF=1 on initial DATA transmission
authorDavid Howells <dhowells@redhat.com>
Wed, 4 Dec 2024 07:46:43 +0000 (07:46 +0000)
committerJakub Kicinski <kuba@kernel.org>
Mon, 9 Dec 2024 21:48:27 +0000 (13:48 -0800)
Change how the DF flag is managed on DATA transmissions.  Set it on initial
transmission and don't set it on retransmissions.  Then remove the handling
for EMSGSIZE in rxrpc_send_data_packet() and just pretend it didn't happen,
leaving it to the retransmission path to retry.

The path-MTU discovery using PING ACKs is then used to probe for the
maximum DATA size - though notification by ICMP will be used if one is
received.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20241204074710.990092-16-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/rxrpc/ar-internal.h
net/rxrpc/output.c
net/rxrpc/proc.c

index 55cc68dd1b403e808c5424d37c51b90f89d9b501..84efa21f176cc3f374a0f61e672bc24169f725f6 100644 (file)
@@ -98,6 +98,7 @@ struct rxrpc_net {
        atomic_t                stat_tx_data_send;
        atomic_t                stat_tx_data_send_frag;
        atomic_t                stat_tx_data_send_fail;
+       atomic_t                stat_tx_data_send_msgsize;
        atomic_t                stat_tx_data_underflow;
        atomic_t                stat_tx_data_cwnd_reset;
        atomic_t                stat_rx_data;
index ca0da5e5d27846e5c082f596f75d78cd3beda198..3d992023f80f21fb46e22fda855ae89a246d6ddc 100644 (file)
@@ -552,16 +552,11 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
        msg.msg_controllen = 0;
        msg.msg_flags   = MSG_SPLICE_PAGES;
 
-       /* Track what we've attempted to transmit at least once so that the
-        * retransmission algorithm doesn't try to resend what we haven't sent
-        * yet.
+       /* Send the packet with the don't fragment bit set unless we think it's
+        * too big or if this is a retransmission.
         */
-       if (txb->seq == call->tx_transmitted + 1)
-               call->tx_transmitted = txb->seq + n - 1;
-
-       /* send the packet with the don't fragment bit set if we currently
-        * think it's small enough */
-       if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
+       if (txb->seq == call->tx_transmitted + 1 &&
+           len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
                rxrpc_local_dont_fragment(conn->local, false);
                frag = rxrpc_tx_point_call_data_frag;
        } else {
@@ -569,6 +564,13 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
                frag = rxrpc_tx_point_call_data_nofrag;
        }
 
+       /* Track what we've attempted to transmit at least once so that the
+        * retransmission algorithm doesn't try to resend what we haven't sent
+        * yet.
+        */
+       if (txb->seq == call->tx_transmitted + 1)
+               call->tx_transmitted = txb->seq + n - 1;
+
        if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
                static int lose;
 
@@ -580,7 +582,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t
                }
        }
 
-retry:
        /* send the packet by UDP
         * - returns -EMSGSIZE if UDP would have to fragment the packet
         *   to go out of the interface
@@ -591,7 +592,11 @@ retry:
        ret = do_udp_sendmsg(conn->local->socket, &msg, len);
        conn->peer->last_tx_at = ktime_get_seconds();
 
-       if (ret < 0) {
+       if (ret == -EMSGSIZE) {
+               rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
+               trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag);
+               ret = 0;
+       } else if (ret < 0) {
                rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
                trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
        } else {
@@ -599,11 +604,6 @@ retry:
        }
 
        rxrpc_tx_backoff(call, ret);
-       if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_nofrag) {
-               rxrpc_local_dont_fragment(conn->local, false);
-               frag = rxrpc_tx_point_call_data_frag;
-               goto retry;
-       }
 
 done:
        if (ret >= 0) {
index 44722c226064871e938bc4b91ee2e53d67677eb9..249e1ed9c5c9f339b191f01a2b639a166f0933cf 100644 (file)
@@ -473,10 +473,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v)
        struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
 
        seq_printf(seq,
-                  "Data     : send=%u sendf=%u fail=%u\n",
+                  "Data     : send=%u sendf=%u fail=%u emsz=%u\n",
                   atomic_read(&rxnet->stat_tx_data_send),
                   atomic_read(&rxnet->stat_tx_data_send_frag),
-                  atomic_read(&rxnet->stat_tx_data_send_fail));
+                  atomic_read(&rxnet->stat_tx_data_send_fail),
+                  atomic_read(&rxnet->stat_tx_data_send_msgsize));
        seq_printf(seq,
                   "Data-Tx  : nr=%u retrans=%u uf=%u cwr=%u\n",
                   atomic_read(&rxnet->stat_tx_data),