[SCSI] zfcp: Fix hang when offlining device with offline chpid
authorChristof Schmitt <christof.schmitt@de.ibm.com>
Thu, 24 Sep 2009 08:23:25 +0000 (10:23 +0200)
committerJames Bottomley <James.Bottomley@suse.de>
Fri, 2 Oct 2009 14:50:21 +0000 (09:50 -0500)
Running chchp --vary 0 and chccwdev -d on a FCP device with scsi
devices attached can lead to this thread hanging:

================================================================
STACK TRACE FOR TASK: 0x2fbfcc00 (kslowcrw)

 STACK:
 0 schedule+1136 [0x45f99c]
 1 schedule_timeout+534 [0x46054e]
 2 wait_for_common+374 [0x45f442]
 3 blk_execute_rq+160 [0x217a2c]
 4 scsi_execute+278 [0x26daf2]
 5 scsi_execute_req+150 [0x26dc86]
 6 sd_sync_cache+138 [0x28460a]
 7 sd_shutdown+130 [0x28486a]
 8 sd_remove+104 [0x284c84]
 9 __device_release_driver+152 [0x257430]
10 device_release_driver+56 [0x2575c8]
11 bus_remove_device+214 [0x25672a]
12 device_del+352 [0x25456c]
13 __scsi_remove_device+108 [0x272630]
14 scsi_remove_device+66 [0x2726ba]
15 zfcp_ccw_remove+824 [0x335558]
16 ccw_device_remove+62 [0x2b3f2a]
17 __device_release_driver+152 [0x257430]
18 device_release_driver+56 [0x2575c8]
19 bus_remove_device+214 [0x25672a]
20 device_del+352 [0x25456c]
21 ccw_device_unregister+92 [0x2b48c4]
22 io_subchannel_remove+108 [0x2b4950]
23 css_remove+62 [0x2af7ee]
24 __device_release_driver+152 [0x257430]
25 device_release_driver+56 [0x2575c8]
26 bus_remove_device+214 [0x25672a]
27 device_del+352 [0x25456c]
28 device_unregister+38 [0x25464a]
29 css_sch_device_unregister+68 [0x2af97c]
30 ccw_device_call_sch_unregister+78 [0x2b581e]
31 worker_thread+604 [0x69eb0]
32 kthread+154 [0x6ff42]
33 kernel_thread_starter+6 [0x1c952]
================================================================

The problem is that the chchp --vary 0 leads to zfcp first calling
fc_remote_port_delete which blocks all scsi devices on the remote
port. Calling scsi_remove_device later lets the sd driver issue a
SYNCHRONIZE_CACHE command. This command stays on the "stopped" request
requeue because the SCSI device is blocked. Fix this by first removing
the scsi and fc hosts which removes all scsi devices and do not use
scsi_remove_device.

Reviewed-by: Felix Beck <felix.beck@de.ibm.com>
Signed-off-by: Christof Schmitt <christof.schmitt@de.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
drivers/s390/scsi/zfcp_aux.c
drivers/s390/scsi/zfcp_ccw.c

index 5dcac24a7a1bf24b8fb193e7406a1dc5df7c789e..0f79f3af4f54ce58be2aa43b92a9f2a15d1bdd37 100644 (file)
@@ -604,7 +604,6 @@ void zfcp_adapter_dequeue(struct zfcp_adapter *adapter)
 
        cancel_work_sync(&adapter->stat_work);
        zfcp_fc_wka_ports_force_offline(adapter->gs);
-       zfcp_adapter_scsi_unregister(adapter);
        sysfs_remove_group(&adapter->ccw_device->dev.kobj,
                           &zfcp_sysfs_adapter_attrs);
        dev_set_drvdata(&adapter->ccw_device->dev, NULL);
index 9fe32f7ec8d2f732f554c7df3fb3c6b68dba7697..e08339428ecf38eb0e3341289a282c732d2f99c8 100644 (file)
@@ -107,6 +107,10 @@ static void zfcp_ccw_remove(struct ccw_device *ccw_device)
        cancel_work_sync(&adapter->scan_work);
 
        mutex_lock(&zfcp_data.config_mutex);
+
+       /* this also removes the scsi devices, so call it first */
+       zfcp_adapter_scsi_unregister(adapter);
+
        write_lock_irq(&zfcp_data.config_lock);
        list_for_each_entry_safe(port, p, &adapter->port_list_head, list) {
                list_for_each_entry_safe(unit, u, &port->unit_list_head, list) {
@@ -121,11 +125,8 @@ static void zfcp_ccw_remove(struct ccw_device *ccw_device)
        write_unlock_irq(&zfcp_data.config_lock);
 
        list_for_each_entry_safe(port, p, &port_remove_lh, list) {
-               list_for_each_entry_safe(unit, u, &unit_remove_lh, list) {
-                       if (unit->device)
-                               scsi_remove_device(unit->device);
+               list_for_each_entry_safe(unit, u, &unit_remove_lh, list)
                        zfcp_unit_dequeue(unit);
-               }
                zfcp_port_dequeue(port);
        }
        wait_event(adapter->remove_wq, atomic_read(&adapter->refcount) == 0);