i40evf: refactor reset handling
authorMitch Williams <mitch.a.williams@intel.com>
Thu, 13 Feb 2014 11:48:53 +0000 (03:48 -0800)
committerDavid S. Miller <davem@davemloft.net>
Thu, 13 Feb 2014 22:27:30 +0000 (17:27 -0500)
Respond better to a VF reset event. When a reset is signaled by the
PF, or detected by the watchdog task, prevent the watchdog from
processing admin queue requests, and schedule the reset task.

In the reset task, wait first for the reset to start, then for it to
complete, then reinit the driver.

If the reset never appears to complete after a long, long time (>10
seconds is possible depending on what's going on with the PF driver),
then set a flag to indicate that PF communications have failed.

If this flag is set, check for the reset to complete in the watchdog,
and  attempt to do a full reinitialization of the driver from scratch.

With these changes the VF driver correctly handles a PF reset event
while running on bare metal, or in a VM.

Also update copyrights.

Change-ID: I93513efd0b50523a8345e7f6a33a5e4f8a2a5996
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c

index 5e0a3440c064cc2b909e8c2e11474b765402d788..a30c4a9125a4ca11d0a729c399a80b35d6d1949f 100644 (file)
@@ -164,15 +164,14 @@ struct i40evf_vlan_filter {
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
        __I40EVF_STARTUP,               /* driver loaded, probe complete */
-       __I40EVF_FAILED,                /* PF communication failed. Fatal. */
        __I40EVF_REMOVE,                /* driver is being unloaded */
        __I40EVF_INIT_VERSION_CHECK,    /* aq msg sent, awaiting reply */
        __I40EVF_INIT_GET_RESOURCES,    /* aq msg sent, awaiting reply */
        __I40EVF_INIT_SW,               /* got resources, setting up structs */
+       __I40EVF_RESETTING,             /* in reset */
        /* Below here, watchdog is running */
        __I40EVF_DOWN,                  /* ready, can be opened */
        __I40EVF_TESTING,               /* in ethtool self-test */
-       __I40EVF_RESETTING,             /* in reset */
        __I40EVF_RUNNING,               /* opened, working */
 };
 
@@ -214,6 +213,8 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_IMIR_ENABLED                 (u32)(1 << 5)
 #define I40EVF_FLAG_MQ_CAPABLE                   (u32)(1 << 6)
 #define I40EVF_FLAG_NEED_LINK_UPDATE             (u32)(1 << 7)
+#define I40EVF_FLAG_PF_COMMS_FAILED              (u32)(1 << 8)
+#define I40EVF_FLAG_RESET_PENDING                (u32)(1 << 9)
 /* duplcates for common code */
 #define I40E_FLAG_FDIR_ATR_ENABLED              0
 #define I40E_FLAG_DCB_ENABLED                   0
@@ -231,6 +232,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES                (u32)(1 << 6)
 #define I40EVF_FLAG_AQ_MAP_VECTORS             (u32)(1 << 7)
 #define I40EVF_FLAG_AQ_HANDLE_RESET            (u32)(1 << 8)
+
        /* OS defined structs */
        struct net_device *netdev;
        struct pci_dev *pdev;
index d271d3a5ae28524552c6c1ba2bc33501cc234421..fe2271e19423a70b7dae20dd4b3b0b5f93aa5d8b 100644 (file)
@@ -964,16 +964,18 @@ void i40evf_down(struct i40evf_adapter *adapter)
        struct net_device *netdev = adapter->netdev;
        struct i40evf_mac_filter *f;
 
-       /* remove all MAC filters from the VSI */
+       /* remove all MAC filters */
        list_for_each_entry(f, &adapter->mac_filter_list, list) {
                f->remove = true;
        }
-       adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-       /* disable receives */
-       adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
-       mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-       msleep(20);
-
+       if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
+           adapter->state != __I40EVF_RESETTING) {
+               adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+               /* disable receives */
+               adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
+               mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
+               msleep(20);
+       }
        netif_tx_disable(netdev);
 
        netif_tx_stop_all_queues(netdev);
@@ -1292,19 +1294,47 @@ static void i40evf_watchdog_task(struct work_struct *work)
                                          watchdog_task);
        struct i40e_hw *hw = &adapter->hw;
 
-       if (adapter->state < __I40EVF_DOWN)
+       if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
+               goto restart_watchdog;
+
+       if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+               dev_info(&adapter->pdev->dev, "Checking for redemption\n");
+               if ((rd32(hw, I40E_VFGEN_RSTAT) & 0x3) == I40E_VFR_VFACTIVE) {
+                       /* A chance for redemption! */
+                       dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
+                       adapter->state = __I40EVF_STARTUP;
+                       adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+                       schedule_delayed_work(&adapter->init_task, 10);
+                       clear_bit(__I40EVF_IN_CRITICAL_TASK,
+                                 &adapter->crit_section);
+                       /* Don't reschedule the watchdog, since we've restarted
+                        * the init task. When init_task contacts the PF and
+                        * gets everything set up again, it'll restart the
+                        * watchdog for us. Down, boy. Sit. Stay. Woof.
+                        */
+                       return;
+               }
+               adapter->aq_pending = 0;
+               adapter->aq_required = 0;
+               adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
                goto watchdog_done;
+       }
 
-       if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
+       if ((adapter->state < __I40EVF_DOWN) ||
+           (adapter->flags & I40EVF_FLAG_RESET_PENDING))
                goto watchdog_done;
 
-       /* check for unannounced reset */
-       if ((adapter->state != __I40EVF_RESETTING) &&
+       /* check for reset */
+       if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING) &&
            (rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) {
                adapter->state = __I40EVF_RESETTING;
+               adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+               dev_err(&adapter->pdev->dev, "Hardware reset detected.\n");
+               dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
                schedule_work(&adapter->reset_task);
-               dev_info(&adapter->pdev->dev, "%s: hardware reset detected\n",
-                        __func__);
+               adapter->aq_pending = 0;
+               adapter->aq_required = 0;
+               adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
                goto watchdog_done;
        }
 
@@ -1359,13 +1389,15 @@ static void i40evf_watchdog_task(struct work_struct *work)
 
        i40evf_irq_enable(adapter, true);
        i40evf_fire_sw_int(adapter, 0xFF);
+
 watchdog_done:
+       clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+restart_watchdog:
        if (adapter->aq_required)
                mod_timer(&adapter->watchdog_timer,
                          jiffies + msecs_to_jiffies(20));
        else
                mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
-       clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
        schedule_work(&adapter->adminq_task);
 }
 
@@ -1412,6 +1444,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
        i40e_flush(hw);
 }
 
+#define I40EVF_RESET_WAIT_MS 100
+#define I40EVF_RESET_WAIT_COUNT 200
 /**
  * i40evf_reset_task - Call-back task to handle hardware reset
  * @work: pointer to work_struct
@@ -1422,8 +1456,9 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
  **/
 static void i40evf_reset_task(struct work_struct *work)
 {
-       struct i40evf_adapter *adapter =
-                       container_of(work, struct i40evf_adapter, reset_task);
+       struct i40evf_adapter *adapter = container_of(work,
+                                                     struct i40evf_adapter,
+                                                     reset_task);
        struct i40e_hw *hw = &adapter->hw;
        int i = 0, err;
        uint32_t rstat_val;
@@ -1431,22 +1466,56 @@ static void i40evf_reset_task(struct work_struct *work)
        while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
                                &adapter->crit_section))
                udelay(500);
+       /* poll until we see the reset actually happen */
+       for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
+               rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
+                           I40E_VFGEN_RSTAT_VFR_STATE_MASK;
+               if (rstat_val != I40E_VFR_VFACTIVE) {
+                       dev_info(&adapter->pdev->dev, "Reset now occurring\n");
+                       break;
+               } else {
+                       msleep(I40EVF_RESET_WAIT_MS);
+               }
+       }
+       if (i == I40EVF_RESET_WAIT_COUNT) {
+               dev_err(&adapter->pdev->dev, "Reset was not detected\n");
+               adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+               goto continue_reset; /* act like the reset happened */
+       }
 
-       /* wait until the reset is complete */
-       for (i = 0; i < 20; i++) {
+       /* wait until the reset is complete and the PF is responding to us */
+       for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
                rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
                            I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-               if (rstat_val == I40E_VFR_COMPLETED)
+               if (rstat_val == I40E_VFR_VFACTIVE) {
+                       dev_info(&adapter->pdev->dev, "Reset is complete. Reinitializing.\n");
                        break;
-               else
-                       mdelay(100);
+               } else {
+                       msleep(I40EVF_RESET_WAIT_MS);
+               }
        }
-       if (i == 20) {
+       if (i == I40EVF_RESET_WAIT_COUNT) {
                /* reset never finished */
-               dev_info(&adapter->pdev->dev, "%s: reset never finished: %x\n",
-                       __func__, rstat_val);
-               /* carry on anyway */
+               dev_err(&adapter->pdev->dev, "Reset never finished (%x). PF driver is dead, and so am I.\n",
+                       rstat_val);
+               adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
+
+               if (netif_running(adapter->netdev))
+                       i40evf_close(adapter->netdev);
+
+               i40evf_free_misc_irq(adapter);
+               i40evf_reset_interrupt_capability(adapter);
+               i40evf_free_queues(adapter);
+               kfree(adapter->vf_res);
+               i40evf_shutdown_adminq(hw);
+               adapter->netdev->flags &= ~IFF_UP;
+               clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+               return; /* Do not attempt to reinit. It's dead, Jim. */
        }
+
+continue_reset:
+       adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
        i40evf_down(adapter);
        adapter->state = __I40EVF_RESETTING;
 
@@ -1506,6 +1575,9 @@ static void i40evf_adminq_task(struct work_struct *work)
        i40e_status ret;
        u16 pending;
 
+       if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+               return;
+
        event.msg_size = I40EVF_MAX_AQ_BUF_SIZE;
        event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
        if (!event.msg_buf) {
@@ -1637,6 +1709,10 @@ static int i40evf_open(struct net_device *netdev)
        struct i40evf_adapter *adapter = netdev_priv(netdev);
        int err;
 
+       if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+               dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
+               return -EIO;
+       }
        if (adapter->state != __I40EVF_DOWN)
                return -EBUSY;
 
@@ -1691,8 +1767,12 @@ static int i40evf_close(struct net_device *netdev)
 {
        struct i40evf_adapter *adapter = netdev_priv(netdev);
 
+       if (adapter->state <= __I40EVF_DOWN)
+               return 0;
+
        /* signal that we are down to the interrupt handler */
        adapter->state = __I40EVF_DOWN;
+
        set_bit(__I40E_DOWN, &adapter->vsi.state);
 
        i40evf_down(adapter);
@@ -1843,6 +1923,8 @@ static void i40evf_init_task(struct work_struct *work)
        switch (adapter->state) {
        case __I40EVF_STARTUP:
                /* driver loaded, probe complete */
+               adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+               adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
                err = i40e_set_mac_type(hw);
                if (err) {
                        dev_info(&pdev->dev, "%s: set_mac_type failed: %d\n",
@@ -2006,9 +2088,11 @@ static void i40evf_init_task(struct work_struct *work)
        adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
        adapter->vsi.netdev = adapter->netdev;
 
-       err = register_netdev(netdev);
-       if (err)
-               goto err_register;
+       if (!adapter->netdev_registered) {
+               err = register_netdev(netdev);
+               if (err)
+                       goto err_register;
+       }
 
        adapter->netdev_registered = true;
 
@@ -2032,17 +2116,16 @@ err_register:
        i40evf_free_misc_irq(adapter);
 err_sw_init:
        i40evf_reset_interrupt_capability(adapter);
-       adapter->state = __I40EVF_FAILED;
 err_alloc:
        kfree(adapter->vf_res);
        adapter->vf_res = NULL;
 err:
+       if (hw->aq.asq.count)
+               i40evf_shutdown_adminq(hw); /* ignore error */
        /* Things went into the weeds, so try again later */
        if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
                dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n");
-               if (hw->aq.asq.count)
-                       i40evf_shutdown_adminq(hw); /* ignore error */
-               adapter->state = __I40EVF_FAILED;
+               adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
                return; /* do not reschedule */
        }
        schedule_delayed_work(&adapter->init_task, HZ * 3);
@@ -2272,6 +2355,7 @@ static void i40evf_remove(struct pci_dev *pdev)
        struct i40e_hw *hw = &adapter->hw;
 
        cancel_delayed_work_sync(&adapter->init_task);
+       cancel_work_sync(&adapter->reset_task);
 
        if (adapter->netdev_registered) {
                unregister_netdev(netdev);
index e6978d79e62bb691a0434b3935b58d870f98f2f4..93891a114d3f08c018c38dd7f6c4672a775dc402 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -43,6 +43,9 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
        struct i40e_hw *hw = &adapter->hw;
        i40e_status err;
 
+       if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+               return 0; /* nothing to see here, move along */
+
        err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
        if (err)
                dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, error %d, aq status %d\n",
@@ -689,10 +692,12 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                        }
                        break;
                case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
-                       adapter->state = __I40EVF_RESETTING;
-                       schedule_work(&adapter->reset_task);
-                       dev_info(&adapter->pdev->dev,
-                                "%s: hardware reset pending\n", __func__);
+                       dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+                       if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
+                               adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+                               dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+                               schedule_work(&adapter->reset_task);
+                       }
                        break;
                default:
                        dev_err(&adapter->pdev->dev,