ibmvnic: Only record tx completed bytes once per handler
authorNick Child <nnac123@linux.ibm.com>
Wed, 7 Aug 2024 21:18:08 +0000 (16:18 -0500)
committerJakub Kicinski <kuba@kernel.org>
Sat, 10 Aug 2024 05:09:18 +0000 (22:09 -0700)
Byte Queue Limits depends on dql_completed being called once per tx
completion round in order to adjust its algorithm appropriately. The
dql->limit value is an approximation of the amount of bytes that the NIC
can consume per irq interval. If this approximation is too high then the
NIC will become over-saturated. Too low and the NIC will starve.

The dql->limit depends on dql->prev-* stats to calculate an optimal
value. If dql_completed() is called more than once per irq handler then
those prev-* values become unreliable (because they are not an accurate
representation of the previous state of the NIC) resulting in a
sub-optimal limit value.

Therefore, move the call to netdev_tx_completed_queue() to the end of
ibmvnic_complete_tx().

When performing 150 sessions of TCP rr (request-response 1 byte packets)
workloads, one could observe:
  PREVIOUSLY: - limit and inflight values hovering around 130
              - transaction rate of around 750k pps.

  NOW:        - limit rises and falls in response to inflight (130-900)
              - transaction rate of around 1M pps (33% improvement)

Signed-off-by: Nick Child <nnac123@linux.ibm.com>
Link: https://patch.msgid.link/20240807211809.1259563-7-nnac123@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/ibm/ibmvnic.c

index d7262674eab7a9972ec32aaf5f4d672193fa4116..b687e5396e110b6109b3faa0a3e4a302bad912ee 100644 (file)
@@ -4189,20 +4189,17 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
                               struct ibmvnic_sub_crq_queue *scrq)
 {
        struct device *dev = &adapter->vdev->dev;
+       int num_packets = 0, total_bytes = 0;
        struct ibmvnic_tx_pool *tx_pool;
        struct ibmvnic_tx_buff *txbuff;
        struct netdev_queue *txq;
        union sub_crq *next;
-       int index;
-       int i;
+       int index, i;
 
 restart_loop:
        while (pending_scrq(adapter, scrq)) {
                unsigned int pool = scrq->pool_index;
                int num_entries = 0;
-               int total_bytes = 0;
-               int num_packets = 0;
-
                next = ibmvnic_next_scrq(adapter, scrq);
                for (i = 0; i < next->tx_comp.num_comps; i++) {
                        index = be32_to_cpu(next->tx_comp.correlators[i]);
@@ -4238,8 +4235,6 @@ restart_loop:
                /* remove tx_comp scrq*/
                next->tx_comp.first = 0;
 
-               txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
-               netdev_tx_completed_queue(txq, num_packets, total_bytes);
 
                if (atomic_sub_return(num_entries, &scrq->used) <=
                    (adapter->req_tx_entries_per_subcrq / 2) &&
@@ -4264,6 +4259,9 @@ restart_loop:
                goto restart_loop;
        }
 
+       txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
+       netdev_tx_completed_queue(txq, num_packets, total_bytes);
+
        return 0;
 }