net: hibmcge: Add support for abnormal irq handling feature
authorJijie Shao <shaojijie@huawei.com>
Fri, 28 Feb 2025 11:54:08 +0000 (19:54 +0800)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 4 Mar 2025 12:45:33 +0000 (13:45 +0100)
the hardware error was reported by interrupt,
and need be fixed by doing function reset,
but the whole reset flow takes a long time,
should not do it in irq handler,
so do it in scheduled task.

Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h
drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c
drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c
drivers/net/ethernet/hisilicon/hibmcge/hbg_err.h
drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c
drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c
drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c
drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h

index 920514a8e29a530fb90f31b74a6e4be16cc46e4c..4e4d33d2832a7fa7326c5a6ec9a30699ec26249e 100644 (file)
@@ -36,6 +36,7 @@ enum hbg_nic_state {
        HBG_NIC_STATE_EVENT_HANDLING = 0,
        HBG_NIC_STATE_RESETTING,
        HBG_NIC_STATE_RESET_FAIL,
+       HBG_NIC_STATE_NEED_RESET, /* trigger a reset in scheduled task */
 };
 
 enum hbg_reset_type {
@@ -104,6 +105,7 @@ struct hbg_irq_info {
        u32 mask;
        bool re_enable;
        bool need_print;
+       bool need_reset;
        u64 count;
 
        void (*irq_handle)(struct hbg_priv *priv, struct hbg_irq_info *info);
@@ -220,6 +222,7 @@ struct hbg_stats {
        u64 rx_fail_comma_cnt;
 
        u64 rx_dma_err_cnt;
+       u64 rx_fifo_less_empty_thrsld_cnt;
 
        u64 tx_octets_total_ok_cnt;
        u64 tx_uc_pkt_cnt;
@@ -268,4 +271,6 @@ struct hbg_priv {
        struct delayed_work service_task;
 };
 
+void hbg_err_reset_task_schedule(struct hbg_priv *priv);
+
 #endif
index 8473c43d171a915e00ac3359e27c987d5f9f7c13..55ce90b4319aa7ea6cd1b6c9b721040c27ffe662 100644 (file)
@@ -67,10 +67,11 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused)
        for (i = 0; i < priv->vectors.info_array_len; i++) {
                info = &priv->vectors.info_array[i];
                seq_printf(s,
-                          "%-20s: enabled: %-5s, logged: %-5s, count: %llu\n",
+                          "%-20s: enabled: %-5s, reset: %-5s, logged: %-5s, count: %llu\n",
                           info->name,
                           str_true_false(hbg_hw_irq_is_enabled(priv,
                                                                info->mask)),
+                          str_true_false(info->need_reset),
                           str_true_false(info->need_print),
                           info->count);
        }
@@ -114,6 +115,8 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused)
                   state_str_true_false(priv, HBG_NIC_STATE_RESET_FAIL));
        seq_printf(s, "last reset type: %s\n",
                   reset_type_str[priv->reset_type]);
+       seq_printf(s, "need reset state: %s\n",
+                  state_str_true_false(priv, HBG_NIC_STATE_NEED_RESET));
 
        return 0;
 }
index 4d1f4a33391a8bd7e40398341f8981a4ee5af096..4e8cb66f601c0e1ad21ab794bbbf79051d8191e3 100644 (file)
@@ -105,6 +105,62 @@ int hbg_reset(struct hbg_priv *priv)
        return hbg_reset_done(priv, HBG_RESET_TYPE_FUNCTION);
 }
 
+void hbg_err_reset(struct hbg_priv *priv)
+{
+       bool running;
+
+       rtnl_lock();
+       running = netif_running(priv->netdev);
+       if (running)
+               dev_close(priv->netdev);
+
+       hbg_reset(priv);
+
+       /* in hbg_pci_err_detected(), we will detach first,
+        * so we need to attach before open
+        */
+       if (!netif_device_present(priv->netdev))
+               netif_device_attach(priv->netdev);
+
+       if (running)
+               dev_open(priv->netdev, NULL);
+       rtnl_unlock();
+}
+
+static pci_ers_result_t hbg_pci_err_detected(struct pci_dev *pdev,
+                                            pci_channel_state_t state)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+
+       netif_device_detach(netdev);
+
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       pci_disable_device(pdev);
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct hbg_priv *priv = netdev_priv(netdev);
+
+       if (pci_enable_device(pdev)) {
+               dev_err(&pdev->dev,
+                       "failed to re-enable PCI device after reset\n");
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       pci_set_master(pdev);
+       pci_restore_state(pdev);
+       pci_save_state(pdev);
+
+       hbg_err_reset(priv);
+       netif_device_attach(netdev);
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
 static void hbg_pci_err_reset_prepare(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
@@ -124,6 +180,8 @@ static void hbg_pci_err_reset_done(struct pci_dev *pdev)
 }
 
 static const struct pci_error_handlers hbg_pci_err_handler = {
+       .error_detected = hbg_pci_err_detected,
+       .slot_reset = hbg_pci_err_slot_reset,
        .reset_prepare = hbg_pci_err_reset_prepare,
        .reset_done = hbg_pci_err_reset_done,
 };
index d7828e446308f5f3a48b43b8612070573a58238c..fb9fbe7004e8f357f5f341e6605fe7ad3acd04de 100644 (file)
@@ -9,5 +9,6 @@
 void hbg_set_pci_err_handler(struct pci_driver *pdrv);
 int hbg_reset(struct hbg_priv *priv);
 int hbg_rebuild(struct hbg_priv *priv);
+void hbg_err_reset(struct hbg_priv *priv);
 
 #endif
index f5be8d0ef611658d2a89cc47cf26abf758dc78de..8f1107b85fbb09cc6101bae2f0313c8fcd823510 100644 (file)
@@ -69,6 +69,7 @@ static const struct hbg_ethtool_stats hbg_ethtool_stats_info[] = {
                        HBG_REG_RX_LENGTHFIELD_ERR_CNT_ADDR),
        HBG_STATS_REG_I(rx_fail_comma_cnt, HBG_REG_RX_FAIL_COMMA_CNT_ADDR),
        HBG_STATS_I(rx_dma_err_cnt),
+       HBG_STATS_I(rx_fifo_less_empty_thrsld_cnt),
 
        HBG_STATS_REG_I(tx_uc_pkt_cnt, HBG_REG_TX_UC_PKTS_ADDR),
        HBG_STATS_REG_I(tx_vlan_pkt_cnt, HBG_REG_TX_TAGGED_ADDR),
index 25dd25f096fe0e9f65242c485760b20430bb1aa6..e79e9ab3e5308e6241d342fb517e68023c72f926 100644 (file)
@@ -11,6 +11,9 @@ static void hbg_irq_handle_err(struct hbg_priv *priv,
        if (irq_info->need_print)
                dev_err(&priv->pdev->dev,
                        "receive error interrupt: %s\n", irq_info->name);
+
+       if (irq_info->need_reset)
+               hbg_err_reset_task_schedule(priv);
 }
 
 static void hbg_irq_handle_tx(struct hbg_priv *priv,
@@ -25,30 +28,38 @@ static void hbg_irq_handle_rx(struct hbg_priv *priv,
        napi_schedule(&priv->rx_ring.napi);
 }
 
-#define HBG_TXRX_IRQ_I(name, handle) \
-       {#name, HBG_INT_MSK_##name##_B, false, false, 0, handle}
-#define HBG_ERR_IRQ_I(name, need_print) \
-       {#name, HBG_INT_MSK_##name##_B, true, need_print, 0, hbg_irq_handle_err}
+static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv,
+                                     struct hbg_irq_info *irq_info)
+{
+       priv->stats.rx_fifo_less_empty_thrsld_cnt++;
+}
+
+#define HBG_IRQ_I(name, handle) \
+       {#name, HBG_INT_MSK_##name##_B, false, false, false, 0, handle}
+#define HBG_ERR_IRQ_I(name, need_print, ndde_reset) \
+       {#name, HBG_INT_MSK_##name##_B, true, need_print, \
+       ndde_reset, 0, hbg_irq_handle_err}
 
 static struct hbg_irq_info hbg_irqs[] = {
-       HBG_TXRX_IRQ_I(RX, hbg_irq_handle_rx),
-       HBG_TXRX_IRQ_I(TX, hbg_irq_handle_tx),
-       HBG_ERR_IRQ_I(MAC_MII_FIFO_ERR, true),
-       HBG_ERR_IRQ_I(MAC_PCS_RX_FIFO_ERR, true),
-       HBG_ERR_IRQ_I(MAC_PCS_TX_FIFO_ERR, true),
-       HBG_ERR_IRQ_I(MAC_APP_RX_FIFO_ERR, true),
-       HBG_ERR_IRQ_I(MAC_APP_TX_FIFO_ERR, true),
-       HBG_ERR_IRQ_I(SRAM_PARITY_ERR, true),
-       HBG_ERR_IRQ_I(TX_AHB_ERR, true),
-       HBG_ERR_IRQ_I(RX_BUF_AVL, false),
-       HBG_ERR_IRQ_I(REL_BUF_ERR, true),
-       HBG_ERR_IRQ_I(TXCFG_AVL, false),
-       HBG_ERR_IRQ_I(TX_DROP, false),
-       HBG_ERR_IRQ_I(RX_DROP, false),
-       HBG_ERR_IRQ_I(RX_AHB_ERR, true),
-       HBG_ERR_IRQ_I(MAC_FIFO_ERR, false),
-       HBG_ERR_IRQ_I(RBREQ_ERR, false),
-       HBG_ERR_IRQ_I(WE_ERR, false),
+       HBG_IRQ_I(RX, hbg_irq_handle_rx),
+       HBG_IRQ_I(TX, hbg_irq_handle_tx),
+       HBG_ERR_IRQ_I(TX_PKT_CPL, true, true),
+       HBG_ERR_IRQ_I(MAC_MII_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(MAC_PCS_RX_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(MAC_PCS_TX_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(MAC_APP_RX_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(MAC_APP_TX_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(SRAM_PARITY_ERR, true, false),
+       HBG_ERR_IRQ_I(TX_AHB_ERR, true, true),
+       HBG_IRQ_I(RX_BUF_AVL, hbg_irq_handle_rx_buf_val),
+       HBG_ERR_IRQ_I(REL_BUF_ERR, true, false),
+       HBG_ERR_IRQ_I(TXCFG_AVL, false, false),
+       HBG_ERR_IRQ_I(TX_DROP, false, false),
+       HBG_ERR_IRQ_I(RX_DROP, false, false),
+       HBG_ERR_IRQ_I(RX_AHB_ERR, true, false),
+       HBG_ERR_IRQ_I(MAC_FIFO_ERR, true, true),
+       HBG_ERR_IRQ_I(RBREQ_ERR, true, true),
+       HBG_ERR_IRQ_I(WE_ERR, true, true),
 };
 
 static irqreturn_t hbg_irq_handle(int irq_num, void *p)
index 688f408de84cd61c9bb8b07e153865fc43ba71cf..40f62db33feda792bbcc6e65f7b72c31958772a8 100644 (file)
@@ -283,6 +283,9 @@ static void hbg_service_task(struct work_struct *work)
        struct hbg_priv *priv = container_of(work, struct hbg_priv,
                                             service_task.work);
 
+       if (test_and_clear_bit(HBG_NIC_STATE_NEED_RESET, &priv->state))
+               hbg_err_reset(priv);
+
        /* The type of statistics register is u32,
         * To prevent the statistics register from overflowing,
         * the driver dumps the statistics every 30 seconds.
@@ -292,6 +295,12 @@ static void hbg_service_task(struct work_struct *work)
                              msecs_to_jiffies(30 * MSEC_PER_SEC));
 }
 
+void hbg_err_reset_task_schedule(struct hbg_priv *priv)
+{
+       set_bit(HBG_NIC_STATE_NEED_RESET, &priv->state);
+       schedule_delayed_work(&priv->service_task, 0);
+}
+
 static void hbg_cancel_delayed_work_sync(void *data)
 {
        cancel_delayed_work_sync(data);
index 106d0e0408ba4050c769589c14728f44bc1ba924..c45450ab608c91a96cc59e3b9b3cc01269fc44c1 100644 (file)
 #define HBG_INT_MSK_MAC_PCS_TX_FIFO_ERR_B      BIT(17)
 #define HBG_INT_MSK_MAC_PCS_RX_FIFO_ERR_B      BIT(16)
 #define HBG_INT_MSK_MAC_MII_FIFO_ERR_B         BIT(15)
+#define HBG_INT_MSK_TX_PKT_CPL_B               BIT(14)
 #define HBG_INT_MSK_TX_B                       BIT(1) /* just used in driver */
 #define HBG_INT_MSK_RX_B                       BIT(0) /* just used in driver */
 #define HBG_REG_CF_INTRPT_STAT_ADDR            (HBG_REG_SGMII_BASE + 0x0434)