1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic SCSI-3 ALUA SCSI Device Handler
5 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
8 #include <linux/slab.h>
9 #include <linux/delay.h>
10 #include <linux/module.h>
11 #include <asm/unaligned.h>
12 #include <scsi/scsi.h>
13 #include <scsi/scsi_proto.h>
14 #include <scsi/scsi_dbg.h>
15 #include <scsi/scsi_eh.h>
16 #include <scsi/scsi_dh.h>
18 #define ALUA_DH_NAME "alua"
19 #define ALUA_DH_VER "2.0"
21 #define TPGS_SUPPORT_NONE 0x00
22 #define TPGS_SUPPORT_OPTIMIZED 0x01
23 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
24 #define TPGS_SUPPORT_STANDBY 0x04
25 #define TPGS_SUPPORT_UNAVAILABLE 0x08
26 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10
27 #define TPGS_SUPPORT_OFFLINE 0x40
28 #define TPGS_SUPPORT_TRANSITION 0x80
29 #define TPGS_SUPPORT_ALL 0xdf
31 #define RTPG_FMT_MASK 0x70
32 #define RTPG_FMT_EXT_HDR 0x10
34 #define TPGS_MODE_UNINITIALIZED -1
35 #define TPGS_MODE_NONE 0x0
36 #define TPGS_MODE_IMPLICIT 0x1
37 #define TPGS_MODE_EXPLICIT 0x2
39 #define ALUA_RTPG_SIZE 128
40 #define ALUA_FAILOVER_TIMEOUT 60
41 #define ALUA_FAILOVER_RETRIES 5
42 #define ALUA_RTPG_DELAY_MSECS 5
44 /* device handler flags */
45 #define ALUA_OPTIMIZE_STPG 0x01
46 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
47 /* State machine flags */
48 #define ALUA_PG_RUN_RTPG 0x10
49 #define ALUA_PG_RUN_STPG 0x20
50 #define ALUA_PG_RUNNING 0x40
52 static uint optimize_stpg;
53 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
54 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
56 static LIST_HEAD(port_group_list);
57 static DEFINE_SPINLOCK(port_group_lock);
58 static struct workqueue_struct *kaluad_wq;
60 struct alua_port_group {
63 struct list_head node;
64 struct list_head dh_list;
65 unsigned char device_id_str[256];
72 unsigned flags; /* used for optimizing STPG */
73 unsigned char transition_tmo;
75 unsigned long interval;
76 struct delayed_work rtpg_work;
78 struct list_head rtpg_list;
79 struct scsi_device *rtpg_sdev;
83 struct list_head node;
84 struct alua_port_group __rcu *pg;
87 struct scsi_device *sdev;
89 struct mutex init_mutex;
92 struct alua_queue_data {
93 struct list_head entry;
94 activate_complete callback_fn;
98 #define ALUA_POLICY_SWITCH_CURRENT 0
99 #define ALUA_POLICY_SWITCH_ALL 1
101 static void alua_rtpg_work(struct work_struct *work);
102 static bool alua_rtpg_queue(struct alua_port_group *pg,
103 struct scsi_device *sdev,
104 struct alua_queue_data *qdata, bool force);
105 static void alua_check(struct scsi_device *sdev, bool force);
107 static void release_port_group(struct kref *kref)
109 struct alua_port_group *pg;
111 pg = container_of(kref, struct alua_port_group, kref);
113 flush_delayed_work(&pg->rtpg_work);
114 spin_lock(&port_group_lock);
116 spin_unlock(&port_group_lock);
121 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
122 * @sdev: sdev the command should be sent to
124 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
125 int bufflen, struct scsi_sense_hdr *sshdr, int flags)
127 u8 cdb[MAX_COMMAND_SIZE];
128 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
131 /* Prepare the command. */
132 memset(cdb, 0x0, MAX_COMMAND_SIZE);
133 cdb[0] = MAINTENANCE_IN;
134 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
135 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
137 cdb[1] = MI_REPORT_TARGET_PGS;
138 put_unaligned_be32(bufflen, &cdb[6]);
140 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
141 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
142 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
146 * submit_stpg - Issue a SET TARGET PORT GROUP command
148 * Currently we're only setting the current target port group state
149 * to 'active/optimized' and let the array firmware figure out
150 * the states of the remaining groups.
152 static int submit_stpg(struct scsi_device *sdev, int group_id,
153 struct scsi_sense_hdr *sshdr)
155 u8 cdb[MAX_COMMAND_SIZE];
156 unsigned char stpg_data[8];
158 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
161 /* Prepare the data buffer */
162 memset(stpg_data, 0, stpg_len);
163 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
164 put_unaligned_be16(group_id, &stpg_data[6]);
166 /* Prepare the command. */
167 memset(cdb, 0x0, MAX_COMMAND_SIZE);
168 cdb[0] = MAINTENANCE_OUT;
169 cdb[1] = MO_SET_TARGET_PGS;
170 put_unaligned_be32(stpg_len, &cdb[6]);
172 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
173 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
174 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
177 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
180 struct alua_port_group *pg;
182 if (!id_str || !id_size || !strlen(id_str))
185 list_for_each_entry(pg, &port_group_list, node) {
186 if (pg->group_id != group_id)
188 if (!pg->device_id_len || pg->device_id_len != id_size)
190 if (strncmp(pg->device_id_str, id_str, id_size))
192 if (!kref_get_unless_zero(&pg->kref))
201 * alua_alloc_pg - Allocate a new port_group structure
203 * @group_id: port group id
204 * @tpgs: target port group settings
206 * Allocate a new port_group structure for a given
209 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
210 int group_id, int tpgs)
212 struct alua_port_group *pg, *tmp_pg;
214 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
216 return ERR_PTR(-ENOMEM);
218 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
219 sizeof(pg->device_id_str));
220 if (pg->device_id_len <= 0) {
222 * TPGS supported but no device identification found.
223 * Generate private device identification.
225 sdev_printk(KERN_INFO, sdev,
226 "%s: No device descriptors found\n",
228 pg->device_id_str[0] = '\0';
229 pg->device_id_len = 0;
231 pg->group_id = group_id;
233 pg->state = SCSI_ACCESS_STATE_OPTIMAL;
234 pg->valid_states = TPGS_SUPPORT_ALL;
236 pg->flags |= ALUA_OPTIMIZE_STPG;
237 kref_init(&pg->kref);
238 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
239 INIT_LIST_HEAD(&pg->rtpg_list);
240 INIT_LIST_HEAD(&pg->node);
241 INIT_LIST_HEAD(&pg->dh_list);
242 spin_lock_init(&pg->lock);
244 spin_lock(&port_group_lock);
245 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
248 spin_unlock(&port_group_lock);
253 list_add(&pg->node, &port_group_list);
254 spin_unlock(&port_group_lock);
260 * alua_check_tpgs - Evaluate TPGS setting
261 * @sdev: device to be checked
263 * Examine the TPGS setting of the sdev to find out if ALUA
266 static int alua_check_tpgs(struct scsi_device *sdev)
268 int tpgs = TPGS_MODE_NONE;
271 * ALUA support for non-disk devices is fraught with
272 * difficulties, so disable it for now.
274 if (sdev->type != TYPE_DISK) {
275 sdev_printk(KERN_INFO, sdev,
276 "%s: disable for non-disk devices\n",
281 tpgs = scsi_device_tpgs(sdev);
283 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
284 sdev_printk(KERN_INFO, sdev,
285 "%s: supports implicit and explicit TPGS\n",
288 case TPGS_MODE_EXPLICIT:
289 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
292 case TPGS_MODE_IMPLICIT:
293 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
297 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
301 sdev_printk(KERN_INFO, sdev,
302 "%s: unsupported TPGS setting %d\n",
304 tpgs = TPGS_MODE_NONE;
312 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
313 * @sdev: device to be checked
315 * Extract the relative target port and the target port group
316 * descriptor from the list of identificators.
318 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
321 int rel_port = -1, group_id;
322 struct alua_port_group *pg, *old_pg = NULL;
323 bool pg_updated = false;
326 group_id = scsi_vpd_tpg_id(sdev, &rel_port);
329 * Internal error; TPGS supported but required
330 * VPD identification descriptors not present.
331 * Disable ALUA support
333 sdev_printk(KERN_INFO, sdev,
334 "%s: No target port descriptors found\n",
336 return SCSI_DH_DEV_UNSUPP;
339 pg = alua_alloc_pg(sdev, group_id, tpgs);
341 if (PTR_ERR(pg) == -ENOMEM)
342 return SCSI_DH_NOMEM;
343 return SCSI_DH_DEV_UNSUPP;
345 if (pg->device_id_len)
346 sdev_printk(KERN_INFO, sdev,
347 "%s: device %s port group %x rel port %x\n",
348 ALUA_DH_NAME, pg->device_id_str,
351 sdev_printk(KERN_INFO, sdev,
352 "%s: port group %x rel port %x\n",
353 ALUA_DH_NAME, group_id, rel_port);
355 /* Check for existing port group references */
356 spin_lock(&h->pg_lock);
357 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
359 /* port group has changed. Update to new port group */
361 spin_lock_irqsave(&old_pg->lock, flags);
362 list_del_rcu(&h->node);
363 spin_unlock_irqrestore(&old_pg->lock, flags);
365 rcu_assign_pointer(h->pg, pg);
369 spin_lock_irqsave(&pg->lock, flags);
371 list_add_rcu(&h->node, &pg->dh_list);
372 spin_unlock_irqrestore(&pg->lock, flags);
374 alua_rtpg_queue(rcu_dereference_protected(h->pg,
375 lockdep_is_held(&h->pg_lock)),
377 spin_unlock(&h->pg_lock);
380 kref_put(&old_pg->kref, release_port_group);
385 static char print_alua_state(unsigned char state)
388 case SCSI_ACCESS_STATE_OPTIMAL:
390 case SCSI_ACCESS_STATE_ACTIVE:
392 case SCSI_ACCESS_STATE_STANDBY:
394 case SCSI_ACCESS_STATE_UNAVAILABLE:
396 case SCSI_ACCESS_STATE_LBA:
398 case SCSI_ACCESS_STATE_OFFLINE:
400 case SCSI_ACCESS_STATE_TRANSITIONING:
407 static int alua_check_sense(struct scsi_device *sdev,
408 struct scsi_sense_hdr *sense_hdr)
410 switch (sense_hdr->sense_key) {
412 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
414 * LUN Not Accessible - ALUA state transition
416 alua_check(sdev, false);
421 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
423 * Power On, Reset, or Bus Device Reset.
424 * Might have obscured a state transition,
425 * so schedule a recheck.
427 alua_check(sdev, true);
428 return ADD_TO_MLQUEUE;
430 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
432 * Device internal reset
434 return ADD_TO_MLQUEUE;
435 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
437 * Mode Parameters Changed
439 return ADD_TO_MLQUEUE;
440 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
444 alua_check(sdev, true);
445 return ADD_TO_MLQUEUE;
447 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
449 * Implicit ALUA state transition failed
451 alua_check(sdev, true);
452 return ADD_TO_MLQUEUE;
454 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
456 * Inquiry data has changed
458 return ADD_TO_MLQUEUE;
459 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
461 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
462 * when switching controllers on targets like
463 * Intel Multi-Flex. We can just retry.
465 return ADD_TO_MLQUEUE;
469 return SCSI_RETURN_NOT_HANDLED;
473 * alua_tur - Send a TEST UNIT READY
474 * @sdev: device to which the TEST UNIT READY command should be send
476 * Send a TEST UNIT READY to @sdev to figure out the device state
477 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
478 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
480 static int alua_tur(struct scsi_device *sdev)
482 struct scsi_sense_hdr sense_hdr;
485 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
486 ALUA_FAILOVER_RETRIES, &sense_hdr);
487 if (sense_hdr.sense_key == NOT_READY &&
488 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
489 return SCSI_DH_RETRY;
497 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
498 * @sdev: the device to be evaluated.
500 * Evaluate the Target Port Group State.
501 * Returns SCSI_DH_DEV_OFFLINED if the path is
502 * found to be unusable.
504 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
506 struct scsi_sense_hdr sense_hdr;
507 struct alua_port_group *tmp_pg;
508 int len, k, off, bufflen = ALUA_RTPG_SIZE;
509 unsigned char *desc, *buff;
510 unsigned err, retval;
511 unsigned int tpg_desc_tbl_off;
512 unsigned char orig_transition_tmo;
516 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
518 if (pg->transition_tmo)
519 transition_tmo = pg->transition_tmo * HZ;
521 pg->expiry = round_jiffies_up(jiffies + transition_tmo);
524 buff = kzalloc(bufflen, GFP_KERNEL);
526 return SCSI_DH_DEV_TEMP_BUSY;
530 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
534 * Some (broken) implementations have a habit of returning
535 * an error during things like firmware update etc.
536 * But if the target only supports active/optimized there's
537 * not much we can do; it's not that we can switch paths
539 * So ignore any errors to avoid spurious failures during
542 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
543 sdev_printk(KERN_INFO, sdev,
544 "%s: ignoring rtpg result %d\n",
545 ALUA_DH_NAME, retval);
549 if (!scsi_sense_valid(&sense_hdr)) {
550 sdev_printk(KERN_INFO, sdev,
551 "%s: rtpg failed, result %d\n",
552 ALUA_DH_NAME, retval);
554 if (driver_byte(retval) == DRIVER_ERROR)
555 return SCSI_DH_DEV_TEMP_BUSY;
560 * submit_rtpg() has failed on existing arrays
561 * when requesting extended header info, and
562 * the array doesn't support extended headers,
563 * even though it shouldn't according to T10.
564 * The retry without rtpg_ext_hdr_req set
567 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
568 sense_hdr.sense_key == ILLEGAL_REQUEST &&
569 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
570 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
574 * Retry on ALUA state transition or if any
575 * UNIT ATTENTION occurred.
577 if (sense_hdr.sense_key == NOT_READY &&
578 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
580 else if (sense_hdr.sense_key == UNIT_ATTENTION)
582 if (err == SCSI_DH_RETRY &&
583 pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
584 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
586 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
590 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
592 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
598 len = get_unaligned_be32(&buff[0]) + 4;
601 /* Resubmit with the correct length */
604 buff = kmalloc(bufflen, GFP_KERNEL);
606 sdev_printk(KERN_WARNING, sdev,
607 "%s: kmalloc buffer failed\n",__func__);
608 /* Temporary failure, bypass */
610 return SCSI_DH_DEV_TEMP_BUSY;
615 orig_transition_tmo = pg->transition_tmo;
616 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
617 pg->transition_tmo = buff[5];
619 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
621 if (orig_transition_tmo != pg->transition_tmo) {
622 sdev_printk(KERN_INFO, sdev,
623 "%s: transition timeout set to %d seconds\n",
624 ALUA_DH_NAME, pg->transition_tmo);
625 pg->expiry = jiffies + pg->transition_tmo * HZ;
628 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
629 tpg_desc_tbl_off = 8;
631 tpg_desc_tbl_off = 4;
633 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
635 k += off, desc += off) {
636 u16 group_id = get_unaligned_be16(&desc[2]);
638 spin_lock_irqsave(&port_group_lock, flags);
639 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
641 spin_unlock_irqrestore(&port_group_lock, flags);
643 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
644 if ((tmp_pg == pg) ||
645 !(tmp_pg->flags & ALUA_PG_RUNNING)) {
646 struct alua_dh_data *h;
648 tmp_pg->state = desc[0] & 0x0f;
649 tmp_pg->pref = desc[0] >> 7;
651 list_for_each_entry_rcu(h,
652 &tmp_pg->dh_list, node) {
653 /* h->sdev should always be valid */
655 h->sdev->access_state = desc[0];
660 tmp_pg->valid_states = desc[1];
661 spin_unlock_irqrestore(&tmp_pg->lock, flags);
663 kref_put(&tmp_pg->kref, release_port_group);
665 off = 8 + (desc[7] * 4);
668 spin_lock_irqsave(&pg->lock, flags);
669 sdev_printk(KERN_INFO, sdev,
670 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
671 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
672 pg->pref ? "preferred" : "non-preferred",
673 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
674 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
675 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
676 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
677 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
678 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
679 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
682 case SCSI_ACCESS_STATE_TRANSITIONING:
683 if (time_before(jiffies, pg->expiry)) {
684 /* State transition, retry */
688 struct alua_dh_data *h;
690 /* Transitioning time exceeded, set port to standby */
692 pg->state = SCSI_ACCESS_STATE_STANDBY;
695 list_for_each_entry_rcu(h, &pg->dh_list, node) {
697 h->sdev->access_state =
698 (pg->state & SCSI_ACCESS_STATE_MASK);
700 h->sdev->access_state |=
701 SCSI_ACCESS_STATE_PREFERRED;
706 case SCSI_ACCESS_STATE_OFFLINE:
708 err = SCSI_DH_DEV_OFFLINED;
712 /* Useable path if active */
717 spin_unlock_irqrestore(&pg->lock, flags);
723 * alua_stpg - Issue a SET TARGET PORT GROUP command
725 * Issue a SET TARGET PORT GROUP command and evaluate the
726 * response. Returns SCSI_DH_RETRY per default to trigger
727 * a re-evaluation of the target group state or SCSI_DH_OK
728 * if no further action needs to be taken.
730 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
733 struct scsi_sense_hdr sense_hdr;
735 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
736 /* Only implicit ALUA supported, retry */
737 return SCSI_DH_RETRY;
740 case SCSI_ACCESS_STATE_OPTIMAL:
742 case SCSI_ACCESS_STATE_ACTIVE:
743 if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
745 (pg->tpgs & TPGS_MODE_IMPLICIT))
748 case SCSI_ACCESS_STATE_STANDBY:
749 case SCSI_ACCESS_STATE_UNAVAILABLE:
751 case SCSI_ACCESS_STATE_OFFLINE:
753 case SCSI_ACCESS_STATE_TRANSITIONING:
756 sdev_printk(KERN_INFO, sdev,
757 "%s: stpg failed, unhandled TPGS state %d",
758 ALUA_DH_NAME, pg->state);
759 return SCSI_DH_NOSYS;
761 retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
764 if (!scsi_sense_valid(&sense_hdr)) {
765 sdev_printk(KERN_INFO, sdev,
766 "%s: stpg failed, result %d",
767 ALUA_DH_NAME, retval);
768 if (driver_byte(retval) == DRIVER_ERROR)
769 return SCSI_DH_DEV_TEMP_BUSY;
771 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
773 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
777 return SCSI_DH_RETRY;
780 static void alua_rtpg_work(struct work_struct *work)
782 struct alua_port_group *pg =
783 container_of(work, struct alua_port_group, rtpg_work.work);
784 struct scsi_device *sdev;
785 LIST_HEAD(qdata_list);
786 int err = SCSI_DH_OK;
787 struct alua_queue_data *qdata, *tmp;
790 spin_lock_irqsave(&pg->lock, flags);
791 sdev = pg->rtpg_sdev;
793 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
794 WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
795 spin_unlock_irqrestore(&pg->lock, flags);
796 kref_put(&pg->kref, release_port_group);
799 pg->flags |= ALUA_PG_RUNNING;
800 if (pg->flags & ALUA_PG_RUN_RTPG) {
801 int state = pg->state;
803 pg->flags &= ~ALUA_PG_RUN_RTPG;
804 spin_unlock_irqrestore(&pg->lock, flags);
805 if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
806 if (alua_tur(sdev) == SCSI_DH_RETRY) {
807 spin_lock_irqsave(&pg->lock, flags);
808 pg->flags &= ~ALUA_PG_RUNNING;
809 pg->flags |= ALUA_PG_RUN_RTPG;
810 spin_unlock_irqrestore(&pg->lock, flags);
811 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
815 /* Send RTPG on failure or if TUR indicates SUCCESS */
817 err = alua_rtpg(sdev, pg);
818 spin_lock_irqsave(&pg->lock, flags);
819 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
820 pg->flags &= ~ALUA_PG_RUNNING;
821 pg->flags |= ALUA_PG_RUN_RTPG;
822 spin_unlock_irqrestore(&pg->lock, flags);
823 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
827 if (err != SCSI_DH_OK)
828 pg->flags &= ~ALUA_PG_RUN_STPG;
830 if (pg->flags & ALUA_PG_RUN_STPG) {
831 pg->flags &= ~ALUA_PG_RUN_STPG;
832 spin_unlock_irqrestore(&pg->lock, flags);
833 err = alua_stpg(sdev, pg);
834 spin_lock_irqsave(&pg->lock, flags);
835 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
836 pg->flags |= ALUA_PG_RUN_RTPG;
838 pg->flags &= ~ALUA_PG_RUNNING;
839 spin_unlock_irqrestore(&pg->lock, flags);
840 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
846 list_splice_init(&pg->rtpg_list, &qdata_list);
847 pg->rtpg_sdev = NULL;
848 spin_unlock_irqrestore(&pg->lock, flags);
850 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
851 list_del(&qdata->entry);
852 if (qdata->callback_fn)
853 qdata->callback_fn(qdata->callback_data, err);
856 spin_lock_irqsave(&pg->lock, flags);
857 pg->flags &= ~ALUA_PG_RUNNING;
858 spin_unlock_irqrestore(&pg->lock, flags);
859 scsi_device_put(sdev);
860 kref_put(&pg->kref, release_port_group);
864 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
865 * @pg: ALUA port group associated with @sdev.
866 * @sdev: SCSI device for which to submit an RTPG.
867 * @qdata: Information about the callback to invoke after the RTPG.
868 * @force: Whether or not to submit an RTPG if a work item that will submit an
869 * RTPG already has been scheduled.
871 * Returns true if and only if alua_rtpg_work() will be called asynchronously.
872 * That function is responsible for calling @qdata->fn().
874 static bool alua_rtpg_queue(struct alua_port_group *pg,
875 struct scsi_device *sdev,
876 struct alua_queue_data *qdata, bool force)
880 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
883 spin_lock_irqsave(&pg->lock, flags);
885 list_add_tail(&qdata->entry, &pg->rtpg_list);
886 pg->flags |= ALUA_PG_RUN_STPG;
889 if (pg->rtpg_sdev == NULL) {
891 pg->flags |= ALUA_PG_RUN_RTPG;
893 pg->rtpg_sdev = sdev;
895 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
896 pg->flags |= ALUA_PG_RUN_RTPG;
897 /* Do not queue if the worker is already running */
898 if (!(pg->flags & ALUA_PG_RUNNING)) {
904 spin_unlock_irqrestore(&pg->lock, flags);
907 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
908 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
911 kref_put(&pg->kref, release_port_group);
914 scsi_device_put(sdev);
920 * alua_initialize - Initialize ALUA state
921 * @sdev: the device to be initialized
923 * For the prep_fn to work correctly we have
924 * to initialize the ALUA state for the device.
926 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
928 int err = SCSI_DH_DEV_UNSUPP, tpgs;
930 mutex_lock(&h->init_mutex);
931 tpgs = alua_check_tpgs(sdev);
932 if (tpgs != TPGS_MODE_NONE)
933 err = alua_check_vpd(sdev, h, tpgs);
935 mutex_unlock(&h->init_mutex);
939 * alua_set_params - set/unset the optimize flag
940 * @sdev: device on the path to be activated
941 * params - parameters in the following format
942 * "no_of_params\0param1\0param2\0param3\0...\0"
943 * For example, to set the flag pass the following parameters
944 * from multipath.conf
945 * hardware_handler "2 alua 1"
947 static int alua_set_params(struct scsi_device *sdev, const char *params)
949 struct alua_dh_data *h = sdev->handler_data;
950 struct alua_port_group *pg = NULL;
951 unsigned int optimize = 0, argc;
952 const char *p = params;
953 int result = SCSI_DH_OK;
956 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
961 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
965 pg = rcu_dereference(h->pg);
970 spin_lock_irqsave(&pg->lock, flags);
972 pg->flags |= ALUA_OPTIMIZE_STPG;
974 pg->flags &= ~ALUA_OPTIMIZE_STPG;
975 spin_unlock_irqrestore(&pg->lock, flags);
982 * alua_activate - activate a path
983 * @sdev: device on the path to be activated
985 * We're currently switching the port group to be activated only and
986 * let the array figure out the rest.
987 * There may be other arrays which require us to switch all port groups
988 * based on a certain policy. But until we actually encounter them it
991 static int alua_activate(struct scsi_device *sdev,
992 activate_complete fn, void *data)
994 struct alua_dh_data *h = sdev->handler_data;
995 int err = SCSI_DH_OK;
996 struct alua_queue_data *qdata;
997 struct alua_port_group *pg;
999 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
1001 err = SCSI_DH_RES_TEMP_UNAVAIL;
1004 qdata->callback_fn = fn;
1005 qdata->callback_data = data;
1007 mutex_lock(&h->init_mutex);
1009 pg = rcu_dereference(h->pg);
1010 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1013 err = h->init_error;
1014 mutex_unlock(&h->init_mutex);
1018 mutex_unlock(&h->init_mutex);
1020 if (alua_rtpg_queue(pg, sdev, qdata, true))
1023 err = SCSI_DH_DEV_OFFLINED;
1024 kref_put(&pg->kref, release_port_group);
1032 * alua_check - check path status
1033 * @sdev: device on the path to be checked
1035 * Check the device status
1037 static void alua_check(struct scsi_device *sdev, bool force)
1039 struct alua_dh_data *h = sdev->handler_data;
1040 struct alua_port_group *pg;
1043 pg = rcu_dereference(h->pg);
1044 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1050 alua_rtpg_queue(pg, sdev, NULL, force);
1051 kref_put(&pg->kref, release_port_group);
1055 * alua_prep_fn - request callback
1057 * Fail I/O to all paths not in state
1058 * active/optimized or active/non-optimized.
1060 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
1062 struct alua_dh_data *h = sdev->handler_data;
1063 struct alua_port_group *pg;
1064 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
1067 pg = rcu_dereference(h->pg);
1073 case SCSI_ACCESS_STATE_OPTIMAL:
1074 case SCSI_ACCESS_STATE_ACTIVE:
1075 case SCSI_ACCESS_STATE_LBA:
1077 case SCSI_ACCESS_STATE_TRANSITIONING:
1078 return BLK_STS_RESOURCE;
1080 req->rq_flags |= RQF_QUIET;
1081 return BLK_STS_IOERR;
1085 static void alua_rescan(struct scsi_device *sdev)
1087 struct alua_dh_data *h = sdev->handler_data;
1089 alua_initialize(sdev, h);
1093 * alua_bus_attach - Attach device handler
1094 * @sdev: device to be attached to
1096 static int alua_bus_attach(struct scsi_device *sdev)
1098 struct alua_dh_data *h;
1101 h = kzalloc(sizeof(*h) , GFP_KERNEL);
1103 return SCSI_DH_NOMEM;
1104 spin_lock_init(&h->pg_lock);
1105 rcu_assign_pointer(h->pg, NULL);
1106 h->init_error = SCSI_DH_OK;
1108 INIT_LIST_HEAD(&h->node);
1110 mutex_init(&h->init_mutex);
1111 err = alua_initialize(sdev, h);
1112 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
1115 sdev->handler_data = h;
1123 * alua_bus_detach - Detach device handler
1124 * @sdev: device to be detached from
1126 static void alua_bus_detach(struct scsi_device *sdev)
1128 struct alua_dh_data *h = sdev->handler_data;
1129 struct alua_port_group *pg;
1131 spin_lock(&h->pg_lock);
1132 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
1133 rcu_assign_pointer(h->pg, NULL);
1135 spin_unlock(&h->pg_lock);
1137 spin_lock_irq(&pg->lock);
1138 list_del_rcu(&h->node);
1139 spin_unlock_irq(&pg->lock);
1140 kref_put(&pg->kref, release_port_group);
1142 sdev->handler_data = NULL;
1146 static struct scsi_device_handler alua_dh = {
1147 .name = ALUA_DH_NAME,
1148 .module = THIS_MODULE,
1149 .attach = alua_bus_attach,
1150 .detach = alua_bus_detach,
1151 .prep_fn = alua_prep_fn,
1152 .check_sense = alua_check_sense,
1153 .activate = alua_activate,
1154 .rescan = alua_rescan,
1155 .set_params = alua_set_params,
1158 static int __init alua_init(void)
1162 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
1164 /* Temporary failure, bypass */
1165 return SCSI_DH_DEV_TEMP_BUSY;
1168 r = scsi_register_device_handler(&alua_dh);
1170 printk(KERN_ERR "%s: Failed to register scsi device handler",
1172 destroy_workqueue(kaluad_wq);
1177 static void __exit alua_exit(void)
1179 scsi_unregister_device_handler(&alua_dh);
1180 destroy_workqueue(kaluad_wq);
1183 module_init(alua_init);
1184 module_exit(alua_exit);
1186 MODULE_DESCRIPTION("DM Multipath ALUA support");
1187 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1188 MODULE_LICENSE("GPL");
1189 MODULE_VERSION(ALUA_DH_VER);