scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out
authorKaran Tilak Kumar <kartilak@cisco.com>
Wed, 18 Jun 2025 00:34:28 +0000 (17:34 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 20 Jun 2025 03:06:27 +0000 (23:06 -0400)
When both the RHBA and RPA FDMI requests time out, fnic reuses a frame to
send ABTS for each of them. On send completion, this causes an attempt to
free the same frame twice that leads to a crash.

Fix crash by allocating separate frames for RHBA and RPA, and modify ABTS
logic accordingly.

Tested by checking MDS for FDMI information.

Tested by using instrumented driver to:

 - Drop PLOGI response
 - Drop RHBA response
 - Drop RPA response
 - Drop RHBA and RPA response
 - Drop PLOGI response + ABTS response
 - Drop RHBA response + ABTS response
 - Drop RPA response + ABTS response
 - Drop RHBA and RPA response + ABTS response for both of them

Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
Tested-by: Arun Easi <aeasi@cisco.com>
Co-developed-by: Arun Easi <aeasi@cisco.com>
Signed-off-by: Arun Easi <aeasi@cisco.com>
Tested-by: Karan Tilak Kumar <kartilak@cisco.com>
Cc: stable@vger.kernel.org
Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
Link: https://lore.kernel.org/r/20250618003431.6314-1-kartilak@cisco.com
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/fnic/fdls_disc.c
drivers/scsi/fnic/fnic.h
drivers/scsi/fnic/fnic_fdls.h

index f8ab69c51dab5c21c9c0d325b57a7ea278306525..36b498ad55b4b676e9fa675a84f59f60b2562ac9 100644 (file)
@@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
        iport->fabric.timer_pending = 1;
 }
 
-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
+               uint16_t oxid)
 {
-       uint8_t *frame;
+       struct fc_frame_header *pfdmi_abts;
        uint8_t d_id[3];
+       uint8_t *frame;
        struct fnic *fnic = iport->fnic;
-       struct fc_frame_header *pfabric_abts;
-       unsigned long fdmi_tov;
-       uint16_t oxid;
-       uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
-                       sizeof(struct fc_frame_header);
 
        frame = fdls_alloc_frame(iport);
        if (frame == NULL) {
                FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
                                "Failed to allocate frame to send FDMI ABTS");
-               return;
+               return NULL;
        }
 
-       pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
+       pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
        fdls_init_fabric_abts_frame(frame, iport);
 
        hton24(d_id, FC_FID_MGMT_SERV);
-       FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
+       FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
+       FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
+
+       return frame;
+}
+
+static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+{
+       uint8_t *frame;
+       unsigned long fdmi_tov;
+       uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
+                       sizeof(struct fc_frame_header);
 
        if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
-               oxid = iport->active_oxid_fdmi_plogi;
-               FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+               frame = fdls_alloc_init_fdmi_abts_frame(iport,
+                                               iport->active_oxid_fdmi_plogi);
+               if (frame == NULL)
+                       return;
+
                fnic_send_fcoe_frame(iport, frame, frame_size);
        } else {
                if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
-                       oxid = iport->active_oxid_fdmi_rhba;
-                       FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+                       frame = fdls_alloc_init_fdmi_abts_frame(iport,
+                                               iport->active_oxid_fdmi_rhba);
+                       if (frame == NULL)
+                               return;
+
                        fnic_send_fcoe_frame(iport, frame, frame_size);
                }
                if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
-                       oxid = iport->active_oxid_fdmi_rpa;
-                       FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+                       frame = fdls_alloc_init_fdmi_abts_frame(iport,
+                                               iport->active_oxid_fdmi_rpa);
+                       if (frame == NULL) {
+                               if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+                                       goto arm_timer;
+                               else
+                                       return;
+                       }
+
                        fnic_send_fcoe_frame(iport, frame, frame_size);
                }
        }
 
+arm_timer:
        fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
        mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
        iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
@@ -2245,6 +2267,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
        spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 }
 
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
+{
+       struct fnic *fnic = iport->fnic;
+
+       iport->fabric.fdmi_pending = 0;
+       /* If max retries not exhausted, start over from fdmi plogi */
+       if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
+               iport->fabric.fdmi_retry++;
+               FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+                                        "Retry FDMI PLOGI. FDMI retry: %d",
+                                        iport->fabric.fdmi_retry);
+               fdls_send_fdmi_plogi(iport);
+       }
+}
+
 void fdls_fdmi_timer_callback(struct timer_list *t)
 {
        struct fnic_fdls_fabric_s *fabric = timer_container_of(fabric, t,
@@ -2291,14 +2328,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
        FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
                "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
 
-       iport->fabric.fdmi_pending = 0;
-       /* If max retries not exhaused, start over from fdmi plogi */
-       if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
-               iport->fabric.fdmi_retry++;
-               FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-                                        "retry fdmi timer %d", iport->fabric.fdmi_retry);
-               fdls_send_fdmi_plogi(iport);
-       }
+       fdls_fdmi_retry_plogi(iport);
        FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
                "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
        spin_unlock_irqrestore(&fnic->fnic_lock, flags);
@@ -3716,11 +3746,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
        switch (FNIC_FRAME_TYPE(oxid)) {
        case FNIC_FRAME_TYPE_FDMI_PLOGI:
                fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
+
+               iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
+               iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
                break;
        case FNIC_FRAME_TYPE_FDMI_RHBA:
+               iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
+
+               /* If RPA is still pending, don't turn off ABORT PENDING.
+                * We count on the timer to detect the ABTS timeout and take
+                * corrective action.
+                */
+               if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
+                       iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
                fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
                break;
        case FNIC_FRAME_TYPE_FDMI_RPA:
+               iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
+
+               /* If RHBA is still pending, don't turn off ABORT PENDING.
+                * We count on the timer to detect the ABTS timeout and take
+                * corrective action.
+                */
+               if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
+                       iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
                fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
                break;
        default:
@@ -3730,10 +3781,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
                break;
        }
 
-       timer_delete_sync(&iport->fabric.fdmi_timer);
-       iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
-
-       fdls_send_fdmi_plogi(iport);
+       /*
+        * Only if ABORT PENDING is off, delete the timer, and if no other
+        * operations are pending, retry FDMI.
+        * Otherwise, let the timer pop and take the appropriate action.
+        */
+       if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
+               timer_delete_sync(&iport->fabric.fdmi_timer);
+               if (!iport->fabric.fdmi_pending)
+                       fdls_fdmi_retry_plogi(iport);
+       }
 }
 
 static void
index 6c5f6046b1f5b20d4416dacf09700efba44d1bf5..86e293ce530d13fe6fff0dd391c07cbea09a927e 100644 (file)
@@ -30,7 +30,7 @@
 
 #define DRV_NAME               "fnic"
 #define DRV_DESCRIPTION                "Cisco FCoE HBA Driver"
-#define DRV_VERSION            "1.8.0.0"
+#define DRV_VERSION            "1.8.0.1"
 #define PFX                    DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
index 8e610b65ad57d3f45740529264f6d846809d6d2c..531d0b37e450f1426666de525bade8af0cec452b 100644 (file)
@@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
 bool fdls_delete_tport(struct fnic_iport_s *iport,
                       struct fnic_tport_s *tport);
 void fdls_fdmi_timer_callback(struct timer_list *t);
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
 
 /* fnic_fcs.c */
 void fnic_fdls_init(struct fnic *fnic, int usefip);