dim: pass dim_sample to net_dim() by reference
authorCaleb Sander Mateos <csander@purestorage.com>
Thu, 31 Oct 2024 00:23:26 +0000 (18:23 -0600)
committerJakub Kicinski <kuba@kernel.org>
Sun, 3 Nov 2024 20:36:54 +0000 (12:36 -0800)
net_dim() is currently passed a struct dim_sample argument by value.
struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64
passes it on the stack. All callers have already initialized dim_sample
on the stack, so passing it by value requires pushing a duplicated copy
to the stack. Either witing to the stack and immediately reading it, or
perhaps dereferencing addresses relative to the stack pointer in a chain
of push instructions, seems to perform quite poorly.

In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time,
94% of which is attributed to the first push instruction to copy
dim_sample on the stack for the call to net_dim():
// Call ktime_get()
  0.26 |4ead2:   call   4ead7 <mlx5e_handle_rx_dim+0x47>
// Pass the address of struct dim in %rdi
       |4ead7:   lea    0x3d0(%rbx),%rdi
// Set dim_sample.pkt_ctr
       |4eade:   mov    %r13d,0x8(%rsp)
// Set dim_sample.byte_ctr
       |4eae3:   mov    %r12d,0xc(%rsp)
// Set dim_sample.event_ctr
  0.15 |4eae8:   mov    %bp,0x10(%rsp)
// Duplicate dim_sample on the stack
 94.16 |4eaed:   push   0x10(%rsp)
  2.79 |4eaf1:   push   0x10(%rsp)
  0.07 |4eaf5:   push   %rax
// Call net_dim()
  0.21 |4eaf6:   call   4eafb <mlx5e_handle_rx_dim+0x6b>

To allow the caller to reuse the struct dim_sample already on the stack,
pass the struct dim_sample by reference to net_dim().

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Arthur Kiyanovski <akiyano@amazon.com>
Reviewed-by: Louis Peens <louis.peens@corigine.com>
Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
19 files changed:
Documentation/networking/net_dim.rst
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/idpf/idpf_txrx.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/netronome/nfp/nfd3/dp.c
drivers/net/ethernet/netronome/nfp/nfdk/dp.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/virtio_net.c
drivers/soc/fsl/dpio/dpio-service.c
include/linux/dim.h
lib/dim/net_dim.c

index 8908fd7b0a8d23e7891b5dba5382e498f8854bb3..4377998e6826d1164be8716257177d520d64ad58 100644 (file)
@@ -156,7 +156,7 @@ usage is not complete but it should make the outline of the usage clear.
                          my_entity->bytes,
                          &dim_sample);
        /* Call net DIM */
-       net_dim(&my_entity->dim, dim_sample);
+       net_dim(&my_entity->dim, &dim_sample);
        ...
   }
 
index 96df20854eb95a33b760294c352961d92d58a802..63c8a2328142d6befed0ee3ed09032f2fe13313f 100644 (file)
@@ -1383,7 +1383,7 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
                          rx_ring->rx_stats.bytes,
                          &dim_sample);
 
-       net_dim(&ena_napi->dim, dim_sample);
+       net_dim(&ena_napi->dim, &dim_sample);
 
        rx_ring->per_napi_packets = 0;
 }
index caff6e87a4882e961648f0e413d8aec252391b0c..031e9e0cca5380bb98f24fe5d32c684fda220c69 100644 (file)
@@ -1029,7 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
        if (priv->dim.use_dim) {
                dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
                                  priv->dim.bytes, &dim_sample);
-               net_dim(&priv->dim.dim, dim_sample);
+               net_dim(&priv->dim.dim, &dim_sample);
        }
 
        return work_done;
index 6dd6541d8619e59e0348eee20f34a9de8d921bcd..ca42b81133d76a2275f952ad40c5829385ddc048 100644 (file)
@@ -3102,7 +3102,7 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
                                  cpr->rx_packets,
                                  cpr->rx_bytes,
                                  &dim_sample);
-               net_dim(&cpr->dim, dim_sample);
+               net_dim(&cpr->dim, &dim_sample);
        }
        return work_done;
 }
@@ -3233,7 +3233,7 @@ poll_done:
                                  cpr_rx->rx_packets,
                                  cpr_rx->rx_bytes,
                                  &dim_sample);
-               net_dim(&cpr->dim, dim_sample);
+               net_dim(&cpr->dim, &dim_sample);
        }
        return work_done;
 }
index 10966ab15373088f5d5609eb9792450d1b542ad7..53a949eb91803593a4b9a830edd576b344f3bdb9 100644 (file)
@@ -2405,7 +2405,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
        if (ring->dim.use_dim) {
                dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
                                  ring->dim.bytes, &dim_sample);
-               net_dim(&ring->dim.dim, dim_sample);
+               net_dim(&ring->dim.dim, &dim_sample);
        }
 
        return work_done;
index c09370eab319b239d2334b80a5bf19668b170665..05dedea6185aa8284b3068d6abaf4e3a01b8cff9 100644 (file)
@@ -718,7 +718,7 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v)
                          v->rx_ring.stats.packets,
                          v->rx_ring.stats.bytes,
                          &dim_sample);
-       net_dim(&v->rx_dim, dim_sample);
+       net_dim(&v->rx_dim, &dim_sample);
 }
 
 static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
index b09f0cca34dc68604dc00b7e7ed233a16fcdadd7..fbfd3ee5648f065807eac4faacc43714db1d66fd 100644 (file)
@@ -4478,7 +4478,7 @@ static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
 
        dim_update_sample(tqp_vector->event_cnt, rx_group->total_packets,
                          rx_group->total_bytes, &sample);
-       net_dim(&rx_group->dim, sample);
+       net_dim(&rx_group->dim, &sample);
 }
 
 static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
@@ -4491,7 +4491,7 @@ static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector)
 
        dim_update_sample(tqp_vector->event_cnt, tx_group->total_packets,
                          tx_group->total_bytes, &sample);
-       net_dim(&tx_group->dim, sample);
+       net_dim(&tx_group->dim, &sample);
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
index 8208055d6e7fc59148f8c86d03b454c649234744..5d2d7736fd5f129407a2b58756b8985ef1b1eb6a 100644 (file)
@@ -1352,14 +1352,14 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
                struct dim_sample dim_sample;
 
                __ice_update_sample(q_vector, tx, &dim_sample, true);
-               net_dim(&tx->dim, dim_sample);
+               net_dim(&tx->dim, &dim_sample);
        }
 
        if (ITR_IS_DYNAMIC(rx)) {
                struct dim_sample dim_sample;
 
                __ice_update_sample(q_vector, rx, &dim_sample, false);
-               net_dim(&rx->dim, dim_sample);
+               net_dim(&rx->dim, &dim_sample);
        }
 }
 
index d4e6f0e104872db1ddb63b8192b7bb79da6c63e8..da2a5becf62f127e7fc16854c391723ce80c4f64 100644 (file)
@@ -3679,7 +3679,7 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector)
 
        idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim,
                               packets, bytes);
-       net_dim(&q_vector->tx_dim, dim_sample);
+       net_dim(&q_vector->tx_dim, &dim_sample);
 
 check_rx_itr:
        if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode))
@@ -3698,7 +3698,7 @@ check_rx_itr:
 
        idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim,
                               packets, bytes);
-       net_dim(&q_vector->rx_dim, dim_sample);
+       net_dim(&q_vector->rx_dim, &dim_sample);
 }
 
 /**
index 933e18ba2fb2c3621d20650f771f0a1efb59e885..7aaf32e9aa95e74472956e45c6ba10230dabdfe0 100644 (file)
@@ -527,7 +527,7 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p
                          rx_frames + tx_frames,
                          rx_bytes + tx_bytes,
                          &dim_sample);
-       net_dim(&cq_poll->dim, dim_sample);
+       net_dim(&cq_poll->dim, &dim_sample);
 }
 
 int otx2_napi_handler(struct napi_struct *napi, int budget)
index f01ceee5f02d699099f70d0b0cb2c729c2e8ebed..53485142938c4762b87da6e62d6aff57cd35be24 100644 (file)
@@ -2227,7 +2227,7 @@ rx_done:
        eth->rx_bytes += bytes;
        dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
                          &dim_sample);
-       net_dim(&eth->rx_dim, dim_sample);
+       net_dim(&eth->rx_dim, &dim_sample);
 
        if (xdp_flush)
                xdp_do_flush();
@@ -2377,7 +2377,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 
        dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
                          &dim_sample);
-       net_dim(&eth->tx_dim, dim_sample);
+       net_dim(&eth->tx_dim, &dim_sample);
 
        if (mtk_queue_stopped(eth) &&
            (atomic_read(&ring->free_count) > ring->thresh))
index 5873fde65c2e33645ca0621f4220b97b84f7f9db..417098f0b2bbd1563871755e12ae6714d1f18fc4 100644 (file)
@@ -55,7 +55,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
                return;
 
        dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
-       net_dim(sq->dim, dim_sample);
+       net_dim(sq->dim, &dim_sample);
 }
 
 static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
@@ -67,7 +67,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
                return;
 
        dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
-       net_dim(rq->dim, dim_sample);
+       net_dim(rq->dim, &dim_sample);
 }
 
 void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
index d215efc6cad0c28425d35b8417e1be9fb5c766d2..f1c6c47564b1766c89918183dff3b802de1fd94d 100644 (file)
@@ -1179,7 +1179,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
                } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
 
                dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-               net_dim(&r_vec->rx_dim, dim_sample);
+               net_dim(&r_vec->rx_dim, &dim_sample);
        }
 
        if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
@@ -1194,7 +1194,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget)
                } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
 
                dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-               net_dim(&r_vec->tx_dim, dim_sample);
+               net_dim(&r_vec->tx_dim, &dim_sample);
        }
 
        return pkts_polled;
index dae5af7d1845bae86abc1f791d6cb04c181e05d8..ebeb6ab4465c6f83eaa890cb2bd4c25ac47fa5fd 100644 (file)
@@ -1289,7 +1289,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
                } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
 
                dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-               net_dim(&r_vec->rx_dim, dim_sample);
+               net_dim(&r_vec->rx_dim, &dim_sample);
        }
 
        if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) {
@@ -1304,7 +1304,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget)
                } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
 
                dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
-               net_dim(&r_vec->tx_dim, dim_sample);
+               net_dim(&r_vec->tx_dim, &dim_sample);
        }
 
        return pkts_polled;
index 0eeda7e502db262ec17c5ec961c45e82d31e81fa..2ac59564ded188e00739fe4487f012cdb90921ef 100644 (file)
@@ -928,7 +928,7 @@ static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
        dim_update_sample(qcq->cq.bound_intr->rearm_count,
                          pkts, bytes, &dim_sample);
 
-       net_dim(&qcq->dim, dim_sample);
+       net_dim(&qcq->dim, &dim_sample);
 }
 
 int ionic_tx_napi(struct napi_struct *napi, int budget)
index 792e9eadbfc3dce4bc146b7b799d602dd32f7f82..869586c17ffd6226268e96ef679859ce69cf632f 100644 (file)
@@ -2804,7 +2804,7 @@ static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue
                          u64_stats_read(&rq->stats.bytes),
                          &cur_sample);
 
-       net_dim(&rq->dim, cur_sample);
+       net_dim(&rq->dim, &cur_sample);
        rq->packets_in_napi = 0;
 }
 
index b811446e0fa55febb51e6338c53ec62f183e9955..0b60ed16297c648da1841db1467421735dfc1940 100644 (file)
@@ -891,7 +891,7 @@ void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes)
        d->frames += frames;
 
        dim_update_sample(d->event_ctr, d->frames, d->bytes, &dim_sample);
-       net_dim(&d->rx_dim, dim_sample);
+       net_dim(&d->rx_dim, &dim_sample);
 
        spin_unlock(&d->dim_lock);
 }
index 84579a50ae7fe613d7a6f003dd7bb010b0427e36..06543fd40fccbd674f6880ebaa6ee7cf23ecde0f 100644 (file)
@@ -425,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
  * This is the main logic of the algorithm, where data is processed in order
  * to decide on next required action.
  */
-void net_dim(struct dim *dim, struct dim_sample end_sample);
+void net_dim(struct dim *dim, const struct dim_sample *end_sample);
 
 /* RDMA DIM */
 
index d7e7028e9b19523c60aca4332c300ca7b26067c6..d6aa09a979b37f5396db87e668d6e97f0592fbf4 100644 (file)
@@ -347,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
        return dim->profile_ix != prev_ix;
 }
 
-void net_dim(struct dim *dim, struct dim_sample end_sample)
+void net_dim(struct dim *dim, const struct dim_sample *end_sample)
 {
        struct dim_stats curr_stats;
        u16 nevents;
@@ -355,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
        switch (dim->state) {
        case DIM_MEASURE_IN_PROGRESS:
                nevents = BIT_GAP(BITS_PER_TYPE(u16),
-                                 end_sample.event_ctr,
+                                 end_sample->event_ctr,
                                  dim->start_sample.event_ctr);
                if (nevents < DIM_NEVENTS)
                        break;
-               if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
+               if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats))
                        break;
                if (net_dim_decision(&curr_stats, dim)) {
                        dim->state = DIM_APPLY_NEW_PROFILE;
@@ -368,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample)
                }
                fallthrough;
        case DIM_START_MEASURE:
-               dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
-                                 end_sample.byte_ctr, &dim->start_sample);
+               dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr,
+                                 end_sample->byte_ctr, &dim->start_sample);
                dim->state = DIM_MEASURE_IN_PROGRESS;
                break;
        case DIM_APPLY_NEW_PROFILE: