net: ethernet: ti: am65-cpsw: add sw tx/rx irq coalescing based on hrtimers
authorGrygorii Strashko <grygorii.strashko@ti.com>
Tue, 19 Dec 2023 10:58:05 +0000 (12:58 +0200)
committerDavid S. Miller <davem@davemloft.net>
Sat, 23 Dec 2023 01:01:19 +0000 (01:01 +0000)
Add SW IRQ coalescing based on hrtimers for TX and RX data path which
can be enabled by ethtool commands:

- RX coalescing
  ethtool -C eth1 rx-usecs 50

- TX coalescing can be enabled per TX queue

  - by default enables coalesing for TX0
  ethtool -C eth1 tx-usecs 50
  - configure TX0
  ethtool -Q eth0 queue_mask 1 --coalesce tx-usecs 100
  - configure TX1
  ethtool -Q eth0 queue_mask 2 --coalesce tx-usecs 100
  - configure TX0 and TX1
  ethtool -Q eth0 queue_mask 3 --coalesce tx-usecs 100 --coalesce tx-usecs 100

  show configuration for TX0 and TX1:
  ethtool -Q eth0 queue_mask 3 --show-coalesce

Comparing to gro_flush_timeout and napi_defer_hard_irqs, this patch
allows to enable IRQ coalesing for RX path separately.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/ti/am65-cpsw-ethtool.c
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/ethernet/ti/am65-cpsw-nuss.h

index 0a6134cdf39f6a52dd46f9c0a3d4ae359a4ba932..35fceba01ea453141602e1a79038c8fd0d3a07c6 100644 (file)
@@ -904,6 +904,80 @@ static void am65_cpsw_get_mm_stats(struct net_device *ndev,
        s->MACMergeHoldCount = readl(base + AM65_CPSW_STATN_IET_TX_HOLD);
 }
 
+static int am65_cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
+{
+       struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+       struct am65_cpsw_tx_chn *tx_chn;
+
+       tx_chn = &common->tx_chns[0];
+
+       coal->rx_coalesce_usecs = common->rx_pace_timeout / 1000;
+       coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
+
+       return 0;
+}
+
+static int am65_cpsw_get_per_queue_coalesce(struct net_device *ndev, u32 queue,
+                                           struct ethtool_coalesce *coal)
+{
+       struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+       struct am65_cpsw_tx_chn *tx_chn;
+
+       if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+               return -EINVAL;
+
+       tx_chn = &common->tx_chns[queue];
+
+       coal->tx_coalesce_usecs = tx_chn->tx_pace_timeout / 1000;
+
+       return 0;
+}
+
+static int am65_cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
+{
+       struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+       struct am65_cpsw_tx_chn *tx_chn;
+
+       tx_chn = &common->tx_chns[0];
+
+       if (coal->rx_coalesce_usecs && coal->rx_coalesce_usecs < 20)
+               return -EINVAL;
+
+       if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20)
+               return -EINVAL;
+
+       common->rx_pace_timeout = coal->rx_coalesce_usecs * 1000;
+       tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
+
+       return 0;
+}
+
+static int am65_cpsw_set_per_queue_coalesce(struct net_device *ndev, u32 queue,
+                                           struct ethtool_coalesce *coal)
+{
+       struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+       struct am65_cpsw_tx_chn *tx_chn;
+
+       if (queue >= AM65_CPSW_MAX_TX_QUEUES)
+               return -EINVAL;
+
+       tx_chn = &common->tx_chns[queue];
+
+       if (coal->tx_coalesce_usecs && coal->tx_coalesce_usecs < 20) {
+               dev_info(common->dev, "defaulting to min value of 20us for tx-usecs for tx-%u\n",
+                        queue);
+               coal->tx_coalesce_usecs = 20;
+       }
+
+       tx_chn->tx_pace_timeout = coal->tx_coalesce_usecs * 1000;
+
+       return 0;
+}
+
 const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
        .begin                  = am65_cpsw_ethtool_op_begin,
        .complete               = am65_cpsw_ethtool_op_complete,
@@ -922,6 +996,11 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
        .get_ts_info            = am65_cpsw_get_ethtool_ts_info,
        .get_priv_flags         = am65_cpsw_get_ethtool_priv_flags,
        .set_priv_flags         = am65_cpsw_set_ethtool_priv_flags,
+       .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+       .get_coalesce           = am65_cpsw_get_coalesce,
+       .set_coalesce           = am65_cpsw_set_coalesce,
+       .get_per_queue_coalesce = am65_cpsw_get_per_queue_coalesce,
+       .set_per_queue_coalesce = am65_cpsw_set_per_queue_coalesce,
 
        .get_link               = ethtool_op_get_link,
        .get_link_ksettings     = am65_cpsw_get_link_ksettings,
index 41e0046a52d544fdaee4768359b70253e7d98f2d..faa0561e988ecb1e8d866f2c9e9b27b109e474d0 100644 (file)
@@ -596,8 +596,10 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
                                        msecs_to_jiffies(1000));
        if (!i)
                dev_err(common->dev, "tx timeout\n");
-       for (i = 0; i < common->tx_ch_num; i++)
+       for (i = 0; i < common->tx_ch_num; i++) {
                napi_disable(&common->tx_chns[i].napi_tx);
+               hrtimer_cancel(&common->tx_chns[i].tx_hrtimer);
+       }
 
        for (i = 0; i < common->tx_ch_num; i++) {
                k3_udma_glue_reset_tx_chn(common->tx_chns[i].tx_chn,
@@ -616,6 +618,7 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
        }
 
        napi_disable(&common->napi_rx);
+       hrtimer_cancel(&common->rx_hrtimer);
 
        for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
                k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, i,
@@ -885,6 +888,15 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
        return ret;
 }
 
+static enum hrtimer_restart am65_cpsw_nuss_rx_timer_callback(struct hrtimer *timer)
+{
+       struct am65_cpsw_common *common =
+                       container_of(timer, struct am65_cpsw_common, rx_hrtimer);
+
+       enable_irq(common->rx_chns.irq);
+       return HRTIMER_NORESTART;
+}
+
 static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
 {
        struct am65_cpsw_common *common = am65_cpsw_napi_to_common(napi_rx);
@@ -912,7 +924,13 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
        if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
                if (common->rx_irq_disabled) {
                        common->rx_irq_disabled = false;
-                       enable_irq(common->rx_chns.irq);
+                       if (unlikely(common->rx_pace_timeout)) {
+                               hrtimer_start(&common->rx_hrtimer,
+                                             ns_to_ktime(common->rx_pace_timeout),
+                                             HRTIMER_MODE_REL_PINNED);
+                       } else {
+                               enable_irq(common->rx_chns.irq);
+                       }
                }
        }
 
@@ -968,7 +986,7 @@ static void am65_cpsw_nuss_tx_wake(struct am65_cpsw_tx_chn *tx_chn, struct net_d
 }
 
 static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
-                                          int chn, unsigned int budget)
+                                          int chn, unsigned int budget, bool *tdown)
 {
        struct device *dev = common->dev;
        struct am65_cpsw_tx_chn *tx_chn;
@@ -991,6 +1009,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
                if (cppi5_desc_is_tdcm(desc_dma)) {
                        if (atomic_dec_and_test(&common->tdown_cnt))
                                complete(&common->tdown_complete);
+                       *tdown = true;
                        break;
                }
 
@@ -1013,7 +1032,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
 }
 
 static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common,
-                                             int chn, unsigned int budget)
+                                             int chn, unsigned int budget, bool *tdown)
 {
        struct device *dev = common->dev;
        struct am65_cpsw_tx_chn *tx_chn;
@@ -1034,6 +1053,7 @@ static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common,
                if (cppi5_desc_is_tdcm(desc_dma)) {
                        if (atomic_dec_and_test(&common->tdown_cnt))
                                complete(&common->tdown_complete);
+                       *tdown = true;
                        break;
                }
 
@@ -1059,21 +1079,40 @@ static int am65_cpsw_nuss_tx_compl_packets_2g(struct am65_cpsw_common *common,
        return num_tx;
 }
 
+static enum hrtimer_restart am65_cpsw_nuss_tx_timer_callback(struct hrtimer *timer)
+{
+       struct am65_cpsw_tx_chn *tx_chns =
+                       container_of(timer, struct am65_cpsw_tx_chn, tx_hrtimer);
+
+       enable_irq(tx_chns->irq);
+       return HRTIMER_NORESTART;
+}
+
 static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
 {
        struct am65_cpsw_tx_chn *tx_chn = am65_cpsw_napi_to_tx_chn(napi_tx);
+       bool tdown = false;
        int num_tx;
 
        if (AM65_CPSW_IS_CPSW2G(tx_chn->common))
-               num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id, budget);
+               num_tx = am65_cpsw_nuss_tx_compl_packets_2g(tx_chn->common, tx_chn->id,
+                                                           budget, &tdown);
        else
-               num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
+               num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common,
+                                                        tx_chn->id, budget, &tdown);
 
        if (num_tx >= budget)
                return budget;
 
-       if (napi_complete_done(napi_tx, num_tx))
-               enable_irq(tx_chn->irq);
+       if (napi_complete_done(napi_tx, num_tx)) {
+               if (unlikely(tx_chn->tx_pace_timeout && !tdown)) {
+                       hrtimer_start(&tx_chn->tx_hrtimer,
+                                     ns_to_ktime(tx_chn->tx_pace_timeout),
+                                     HRTIMER_MODE_REL_PINNED);
+               } else {
+                       enable_irq(tx_chn->irq);
+               }
+       }
 
        return 0;
 }
@@ -1705,6 +1744,8 @@ static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
 
                netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx,
                                  am65_cpsw_nuss_tx_poll);
+               hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+               tx_chn->tx_hrtimer.function = &am65_cpsw_nuss_tx_timer_callback;
 
                ret = devm_request_irq(dev, tx_chn->irq,
                                       am65_cpsw_nuss_tx_irq,
@@ -1930,6 +1971,8 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 
        netif_napi_add(common->dma_ndev, &common->napi_rx,
                       am65_cpsw_nuss_rx_poll);
+       hrtimer_init(&common->rx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+       common->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
 
        ret = devm_request_irq(dev, rx_chn->irq,
                               am65_cpsw_nuss_rx_irq,
index 1e4a045057fc1183737e617a0a242bc8c016044a..7da0492dc09112ee67983935a64a05473c245103 100644 (file)
@@ -75,6 +75,8 @@ struct am65_cpsw_tx_chn {
        struct k3_cppi_desc_pool *desc_pool;
        struct k3_udma_glue_tx_channel *tx_chn;
        spinlock_t lock; /* protect TX rings in multi-port mode */
+       struct hrtimer tx_hrtimer;
+       unsigned long tx_pace_timeout;
        int irq;
        u32 id;
        u32 descs_num;
@@ -138,6 +140,8 @@ struct am65_cpsw_common {
        struct napi_struct      napi_rx;
 
        bool                    rx_irq_disabled;
+       struct hrtimer          rx_hrtimer;
+       unsigned long           rx_pace_timeout;
 
        u32                     nuss_ver;
        u32                     cpsw_ver;