scsi: qla2xxx: Fix hang on NVMe command timeouts
authorArun Easi <aeasi@marvell.com>
Tue, 17 Aug 2021 05:13:11 +0000 (22:13 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 24 Aug 2021 02:36:54 +0000 (22:36 -0400)
The abort callback gets called only when it gets posted to firmware. The
refcounting is done properly in the callback. On internal errors, the
callback is not invoked leading to a hung I/O. Fix this by having separate
error code when command gets returned from firmware.

Link: https://lore.kernel.org/r/20210817051315.2477-9-njavali@marvell.com
Signed-off-by: Arun Easi <aeasi@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_nvme.c

index ddc6932f05fae56846540249b2a4b91e368c9e29..cb5bf2585cb76d0f8aeb4a1631753bbfd65ae28d 100644 (file)
@@ -5166,6 +5166,9 @@ struct secure_flash_update_block_pk {
 #define QLA_BUSY                       0x107
 #define QLA_ALREADY_REGISTERED         0x109
 #define QLA_OS_TIMER_EXPIRED           0x10a
+#define QLA_ERR_NO_QPAIR               0x10b
+#define QLA_ERR_NOT_FOUND              0x10c
+#define QLA_ERR_FROM_FW                        0x10d
 
 #define NVRAM_DELAY()          udelay(10)
 
index a70c68bb1d2db6a4f94181dd5a5608f71b7bd945..255f3a8884db196fe1159c1d82c5b088d881ae38 100644 (file)
@@ -157,7 +157,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
        sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
                                  GFP_ATOMIC);
        if (!sp)
-               return rval;
+               return QLA_MEMORY_ALLOC_FAILED;
 
        abt_iocb = &sp->u.iocb_cmd;
        sp->type = SRB_ABT_CMD;
@@ -190,7 +190,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
        if (wait) {
                wait_for_completion(&abt_iocb->u.abt.comp);
                rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
-                       QLA_SUCCESS : QLA_FUNCTION_FAILED;
+                       QLA_SUCCESS : QLA_ERR_FROM_FW;
                sp->free(sp);
        }
 
@@ -1988,7 +1988,7 @@ qla24xx_async_abort_command(srb_t *sp)
 
        if (handle == req->num_outstanding_cmds) {
                /* Command not found. */
-               return QLA_FUNCTION_FAILED;
+               return QLA_ERR_NOT_FOUND;
        }
        if (sp->type == SRB_FXIOCB_DCMD)
                return qlafx00_fx_disc(vha, &vha->hw->mr.fcport,
index 2964f5280bedfc4f9322c6eefff090abc2ab5665..fcc219172aa938c43ef04d4b1e5876f4dcad2c93 100644 (file)
@@ -3245,7 +3245,7 @@ qla24xx_abort_command(srb_t *sp)
        if (sp->qpair)
                req = sp->qpair->req;
        else
-               return QLA_FUNCTION_FAILED;
+               return QLA_ERR_NO_QPAIR;
 
        if (ql2xasynctmfenable)
                return qla24xx_async_abort_command(sp);
@@ -3258,7 +3258,7 @@ qla24xx_abort_command(srb_t *sp)
        spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
        if (handle == req->num_outstanding_cmds) {
                /* Command not found. */
-               return QLA_FUNCTION_FAILED;
+               return QLA_ERR_NOT_FOUND;
        }
 
        abt = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &abt_dma);
index d294b590581e365b26341aab819272d24c71ed2b..1c5da2dbd6f97897cceea5f72754307ac608a79e 100644 (file)
@@ -227,11 +227,11 @@ static void qla_nvme_abort_work(struct work_struct *work)
        srb_t *sp = priv->sp;
        fc_port_t *fcport = sp->fcport;
        struct qla_hw_data *ha = fcport->vha->hw;
-       int rval;
+       int rval, abts_done_called = 1;
 
        ql_dbg(ql_dbg_io, fcport->vha, 0xffff,
-              "%s called for sp=%p, hndl=%x on fcport=%p deleted=%d\n",
-              __func__, sp, sp->handle, fcport, fcport->deleted);
+              "%s called for sp=%p, hndl=%x on fcport=%p desc=%p deleted=%d\n",
+              __func__, sp, sp->handle, fcport, sp->u.iocb_cmd.u.nvme.desc, fcport->deleted);
 
        if (!ha->flags.fw_started || fcport->deleted == QLA_SESS_DELETED)
                goto out;
@@ -251,12 +251,20 @@ static void qla_nvme_abort_work(struct work_struct *work)
            __func__, (rval != QLA_SUCCESS) ? "Failed to abort" : "Aborted",
            sp, sp->handle, fcport, rval);
 
+       /*
+        * If async tmf is enabled, the abort callback is called only on
+        * return codes QLA_SUCCESS and QLA_ERR_FROM_FW.
+        */
+       if (ql2xasynctmfenable &&
+           rval != QLA_SUCCESS && rval != QLA_ERR_FROM_FW)
+               abts_done_called = 0;
+
        /*
         * Returned before decreasing kref so that I/O requests
         * are waited until ABTS complete. This kref is decreased
         * at qla24xx_abort_sp_done function.
         */
-       if (ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
+       if (abts_done_called && ql2xabts_wait_nvme && QLA_ABTS_WAIT_ENABLED(sp))
                return;
 out:
        /* kref_get was done before work was schedule. */
@@ -804,14 +812,14 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
        case CS_PORT_LOGGED_OUT:
        /* BA_RJT was received for the ABTS */
        case CS_PORT_CONFIG_CHG:
-               ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09d,
+               ql_dbg(ql_dbg_async, vha, 0xf09d,
                       "Abort I/O IOCB completed with error, comp_status=%x\n",
                comp_status);
                break;
 
        /* BA_RJT was received for the ABTS */
        case CS_REJECT_RECEIVED:
-               ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
+               ql_dbg(ql_dbg_async, vha, 0xf09e,
                       "BA_RJT was received for the ABTS rjt_vendorUnique = %u",
                        abt->fw.ba_rjt_vendorUnique);
                ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09e,
@@ -820,18 +828,18 @@ void qla_nvme_abort_process_comp_status(struct abort_entry_24xx *abt, srb_t *ori
                break;
 
        case CS_COMPLETE:
-               ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf09f,
+               ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0xf09f,
                       "IOCB request is completed successfully comp_status=%x\n",
                comp_status);
                break;
 
        case CS_IOCB_ERROR:
-               ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a0,
+               ql_dbg(ql_dbg_async, vha, 0xf0a0,
                       "IOCB request is failed, comp_status=%x\n", comp_status);
                break;
 
        default:
-               ql_dbg(ql_dbg_async + ql_dbg_mbx, vha, 0xf0a1,
+               ql_dbg(ql_dbg_async, vha, 0xf0a1,
                       "Invalid Abort IO IOCB Completion Status %x\n",
                comp_status);
                break;