scsi: lpfc: Coordinate adapter error handling with offline handling
authorJames Smart <jsmart2021@gmail.com>
Tue, 12 Mar 2019 23:30:14 +0000 (16:30 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 19 Mar 2019 16:57:02 +0000 (12:57 -0400)
The driver periodically checks for adapter error in a background thread. If
the thread detects an error, the adapter will be reset including the
deletion and reallocation of workqueues on the adapter.  Simultaneously,
there may be a user-space request to offline the adapter which may try to
do many of the same steps, in parallel, on a different thread. As memory
was deallocated while unexpected, the parallel offline request hit a bad
pointer.

Add coordination between the two threads.  The error recovery thread has
precedence. So, when an error is detected, a flag is set on the adapter to
indicate the error thread is terminating the adapter. But, before doing
that work, it will look for a flag that is set by the offline flow, and if
set, will wait for it to complete before then processing the error handling
path.  Similarly, in the offline thread, it first checks for whether the
error thread is resetting the adapter, and if so, will then wait for the
error thread to finish. Only after it has finished, will it set its flag
and offline the adapter.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli.h

index 5d6c874c44e7b986635cf4489d62e600b7349abb..61745f59091623e514f0c0ea205bb5dbee2c331e 100644 (file)
@@ -1204,6 +1204,20 @@ lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 
        psli = &phba->sli;
 
+       /*
+        * If freeing the queues have already started, don't access them.
+        * Otherwise set FREE_WAIT to indicate that queues are being used
+        * to hold the freeing process until we finish.
+        */
+       spin_lock_irq(&phba->hbalock);
+       if (!(psli->sli_flag & LPFC_QUEUE_FREE_INIT)) {
+               psli->sli_flag |= LPFC_QUEUE_FREE_WAIT;
+       } else {
+               spin_unlock_irq(&phba->hbalock);
+               goto skip_wait;
+       }
+       spin_unlock_irq(&phba->hbalock);
+
        /* Wait a little for things to settle down, but not
         * long enough for dev loss timeout to expire.
         */
@@ -1225,6 +1239,11 @@ lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
                }
        }
 out:
+       spin_lock_irq(&phba->hbalock);
+       psli->sli_flag &= ~LPFC_QUEUE_FREE_WAIT;
+       spin_unlock_irq(&phba->hbalock);
+
+skip_wait:
        init_completion(&online_compl);
        rc = lpfc_workq_post_event(phba, &status, &online_compl, type);
        if (rc == 0)
index 4644cea2e5ce4ecfdc3ec9652f59ddb0c290feee..05fbb4765d706a1f2261efef211cdcfdc6e23d37 100644 (file)
@@ -9135,6 +9135,20 @@ lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 void
 lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 {
+       /*
+        * Set FREE_INIT before beginning to free the queues.
+        * Wait until the users of queues to acknowledge to
+        * release queues by clearing FREE_WAIT.
+        */
+       spin_lock_irq(&phba->hbalock);
+       phba->sli.sli_flag |= LPFC_QUEUE_FREE_INIT;
+       while (phba->sli.sli_flag & LPFC_QUEUE_FREE_WAIT) {
+               spin_unlock_irq(&phba->hbalock);
+               msleep(20);
+               spin_lock_irq(&phba->hbalock);
+       }
+       spin_unlock_irq(&phba->hbalock);
+
        /* Release HBA eqs */
        if (phba->sli4_hba.hdwq)
                lpfc_sli4_release_hdwq(phba);
@@ -9173,6 +9187,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 
        /* Everything on this list has been freed */
        INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
+
+       /* Done with freeing the queues */
+       spin_lock_irq(&phba->hbalock);
+       phba->sli.sli_flag &= ~LPFC_QUEUE_FREE_INIT;
+       spin_unlock_irq(&phba->hbalock);
 }
 
 int
index 32ded3e9b32e15e50420736a39ad55ff763986b4..4b084a408e71302745eb30974e8967ce9c9afa37 100644 (file)
@@ -14417,6 +14417,9 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
        if (!queue)
                return;
 
+       if (!list_empty(&queue->wq_list))
+               list_del(&queue->wq_list);
+
        while (!list_empty(&queue->page_list)) {
                list_remove_head(&queue->page_list, dmabuf, struct lpfc_dmabuf,
                                 list);
@@ -14432,9 +14435,6 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
        if (!list_empty(&queue->cpu_list))
                list_del(&queue->cpu_list);
 
-       if (!list_empty(&queue->wq_list))
-               list_del(&queue->wq_list);
-
        kfree(queue);
        return;
 }
index 1153a6c91bde69bfec76101a184f12f6a891148d..467b8270f7fdd0230c5e3dcc3e0c55b5995204a6 100644 (file)
@@ -327,6 +327,10 @@ struct lpfc_sli {
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 #define LPFC_SLI_SUPPRESS_RSP     0x4000 /* Suppress RSP feature is supported */
 #define LPFC_SLI_USE_EQDR         0x8000 /* EQ Delay Register is supported */
+#define LPFC_QUEUE_FREE_INIT     0x10000 /* Queue freeing is in progress */
+#define LPFC_QUEUE_FREE_WAIT     0x20000 /* Hold Queue free as it is being
+                                          * used outside worker thread
+                                          */
 
        struct lpfc_sli_ring *sli3_ring;