scsi_dh_alua: Use workqueue for RTPG
[linux-2.6-block.git] / drivers / scsi / device_handler / scsi_dh_alua.c
CommitLineData
057ea7c9
HR
1/*
2 * Generic SCSI-3 ALUA SCSI Device Handler
3 *
69723d17 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
057ea7c9
HR
5 * All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 */
5a0e3ad6 22#include <linux/slab.h>
69723d17 23#include <linux/delay.h>
acf3368f 24#include <linux/module.h>
a7089770 25#include <asm/unaligned.h>
057ea7c9 26#include <scsi/scsi.h>
80bd68d6 27#include <scsi/scsi_dbg.h>
057ea7c9
HR
28#include <scsi/scsi_eh.h>
29#include <scsi/scsi_dh.h>
30
31#define ALUA_DH_NAME "alua"
69723d17 32#define ALUA_DH_VER "1.3"
057ea7c9
HR
33
34#define TPGS_STATE_OPTIMIZED 0x0
35#define TPGS_STATE_NONOPTIMIZED 0x1
36#define TPGS_STATE_STANDBY 0x2
37#define TPGS_STATE_UNAVAILABLE 0x3
69723d17 38#define TPGS_STATE_LBA_DEPENDENT 0x4
057ea7c9
HR
39#define TPGS_STATE_OFFLINE 0xe
40#define TPGS_STATE_TRANSITIONING 0xf
41
42#define TPGS_SUPPORT_NONE 0x00
43#define TPGS_SUPPORT_OPTIMIZED 0x01
44#define TPGS_SUPPORT_NONOPTIMIZED 0x02
45#define TPGS_SUPPORT_STANDBY 0x04
46#define TPGS_SUPPORT_UNAVAILABLE 0x08
69723d17 47#define TPGS_SUPPORT_LBA_DEPENDENT 0x10
057ea7c9
HR
48#define TPGS_SUPPORT_OFFLINE 0x40
49#define TPGS_SUPPORT_TRANSITION 0x80
50
3588c5a2
RE
51#define RTPG_FMT_MASK 0x70
52#define RTPG_FMT_EXT_HDR 0x10
53
057ea7c9
HR
54#define TPGS_MODE_UNINITIALIZED -1
55#define TPGS_MODE_NONE 0x0
56#define TPGS_MODE_IMPLICIT 0x1
57#define TPGS_MODE_EXPLICIT 0x2
58
c49c8345 59#define ALUA_RTPG_SIZE 128
3588c5a2 60#define ALUA_FAILOVER_TIMEOUT 60
057ea7c9 61#define ALUA_FAILOVER_RETRIES 5
03197b61 62#define ALUA_RTPG_DELAY_MSECS 5
057ea7c9 63
6c4fc044 64/* device handler flags */
03197b61
HR
65#define ALUA_OPTIMIZE_STPG 0x01
66#define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
67/* State machine flags */
68#define ALUA_PG_RUN_RTPG 0x10
69#define ALUA_PG_RUN_STPG 0x20
70#define ALUA_PG_RUNNING 0x40
4335d092 71
aa90f490
HR
72static uint optimize_stpg;
73module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
74MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
75
43394c67
HR
76static LIST_HEAD(port_group_list);
77static DEFINE_SPINLOCK(port_group_lock);
03197b61 78static struct workqueue_struct *kaluad_wq;
43394c67
HR
79
80struct alua_port_group {
81 struct kref kref;
03197b61 82 struct rcu_head rcu;
43394c67 83 struct list_head node;
0047220c
HR
84 unsigned char device_id_str[256];
85 int device_id_len;
057ea7c9 86 int group_id;
057ea7c9
HR
87 int tpgs;
88 int state;
dcd3a754 89 int pref;
4335d092 90 unsigned flags; /* used for optimizing STPG */
3588c5a2 91 unsigned char transition_tmo;
03197b61
HR
92 unsigned long expiry;
93 unsigned long interval;
94 struct delayed_work rtpg_work;
95 spinlock_t lock;
96 struct list_head rtpg_list;
97 struct scsi_device *rtpg_sdev;
43394c67
HR
98};
99
100struct alua_dh_data {
101 struct alua_port_group *pg;
102 int group_id;
03197b61 103 spinlock_t pg_lock;
96e65865 104 struct scsi_device *sdev;
03197b61
HR
105 int init_error;
106 struct mutex init_mutex;
107};
108
109struct alua_queue_data {
110 struct list_head entry;
96e65865
CS
111 activate_complete callback_fn;
112 void *callback_data;
057ea7c9
HR
113};
114
115#define ALUA_POLICY_SWITCH_CURRENT 0
116#define ALUA_POLICY_SWITCH_ALL 1
117
03197b61
HR
118static void alua_rtpg_work(struct work_struct *work);
119static void alua_rtpg_queue(struct alua_port_group *pg,
120 struct scsi_device *sdev,
121 struct alua_queue_data *qdata);
96e65865 122
43394c67
HR
123static void release_port_group(struct kref *kref)
124{
125 struct alua_port_group *pg;
126
127 pg = container_of(kref, struct alua_port_group, kref);
03197b61
HR
128 if (pg->rtpg_sdev)
129 flush_delayed_work(&pg->rtpg_work);
43394c67
HR
130 spin_lock(&port_group_lock);
131 list_del(&pg->node);
132 spin_unlock(&port_group_lock);
03197b61 133 kfree_rcu(pg, rcu);
43394c67
HR
134}
135
057ea7c9
HR
136/*
137 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
138 * @sdev: sdev the command should be sent to
139 */
40bb61a7
HR
140static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
141 int bufflen, struct scsi_sense_hdr *sshdr, int flags)
057ea7c9 142{
40bb61a7
HR
143 u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)];
144 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
145 REQ_FAILFAST_DRIVER;
057ea7c9
HR
146
147 /* Prepare the command. */
40bb61a7
HR
148 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN));
149 cdb[0] = MAINTENANCE_IN;
d42ae5f3 150 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
40bb61a7 151 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
8e67ce60 152 else
40bb61a7
HR
153 cdb[1] = MI_REPORT_TARGET_PGS;
154 put_unaligned_be32(bufflen, &cdb[6]);
155
156 return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
157 buff, bufflen, sshdr,
158 ALUA_FAILOVER_TIMEOUT * HZ,
159 ALUA_FAILOVER_RETRIES, NULL, req_flags);
057ea7c9
HR
160}
161
96e65865 162/*
b2460756 163 * submit_stpg - Issue a SET TARGET PORT GROUP command
057ea7c9
HR
164 *
165 * Currently we're only setting the current target port group state
166 * to 'active/optimized' and let the array firmware figure out
167 * the states of the remaining groups.
168 */
40bb61a7
HR
169static int submit_stpg(struct scsi_device *sdev, int group_id,
170 struct scsi_sense_hdr *sshdr)
057ea7c9 171{
40bb61a7 172 u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)];
b2460756 173 unsigned char stpg_data[8];
057ea7c9 174 int stpg_len = 8;
40bb61a7
HR
175 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
176 REQ_FAILFAST_DRIVER;
057ea7c9
HR
177
178 /* Prepare the data buffer */
b2460756
HR
179 memset(stpg_data, 0, stpg_len);
180 stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f;
181 put_unaligned_be16(group_id, &stpg_data[6]);
057ea7c9 182
057ea7c9 183 /* Prepare the command. */
40bb61a7
HR
184 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT));
185 cdb[0] = MAINTENANCE_OUT;
186 cdb[1] = MO_SET_TARGET_PGS;
187 put_unaligned_be32(stpg_len, &cdb[6]);
188
189 return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
190 stpg_data, stpg_len,
191 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
192 ALUA_FAILOVER_RETRIES, NULL, req_flags);
057ea7c9
HR
193}
194
0047220c
HR
195struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
196 int group_id)
197{
198 struct alua_port_group *pg;
199
200 list_for_each_entry(pg, &port_group_list, node) {
201 if (pg->group_id != group_id)
202 continue;
203 if (pg->device_id_len != id_size)
204 continue;
205 if (strncmp(pg->device_id_str, id_str, id_size))
206 continue;
207 if (!kref_get_unless_zero(&pg->kref))
208 continue;
209 return pg;
210 }
211
212 return NULL;
213}
214
43394c67
HR
215/*
216 * alua_alloc_pg - Allocate a new port_group structure
217 * @sdev: scsi device
218 * @h: alua device_handler data
219 * @group_id: port group id
220 *
221 * Allocate a new port_group structure for a given
222 * device.
223 */
224struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
225 int group_id, int tpgs)
226{
0047220c 227 struct alua_port_group *pg, *tmp_pg;
43394c67
HR
228
229 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
230 if (!pg)
0047220c 231 return ERR_PTR(-ENOMEM);
43394c67 232
0047220c
HR
233 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
234 sizeof(pg->device_id_str));
235 if (pg->device_id_len <= 0) {
236 /*
237 * Internal error: TPGS supported but no device
238 * identifcation found. Disable ALUA support.
239 */
240 kfree(pg);
241 sdev_printk(KERN_INFO, sdev,
242 "%s: No device descriptors found\n",
243 ALUA_DH_NAME);
244 return ERR_PTR(-ENXIO);
245 }
43394c67
HR
246 pg->group_id = group_id;
247 pg->tpgs = tpgs;
248 pg->state = TPGS_STATE_OPTIMIZED;
aa90f490
HR
249 if (optimize_stpg)
250 pg->flags |= ALUA_OPTIMIZE_STPG;
43394c67 251 kref_init(&pg->kref);
03197b61
HR
252 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
253 INIT_LIST_HEAD(&pg->rtpg_list);
254 INIT_LIST_HEAD(&pg->node);
255 spin_lock_init(&pg->lock);
0047220c 256
43394c67 257 spin_lock(&port_group_lock);
0047220c
HR
258 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
259 group_id);
260 if (tmp_pg) {
261 spin_unlock(&port_group_lock);
262 kfree(pg);
263 return tmp_pg;
264 }
265
43394c67
HR
266 list_add(&pg->node, &port_group_list);
267 spin_unlock(&port_group_lock);
268
269 return pg;
270}
271
057ea7c9 272/*
d7c48feb 273 * alua_check_tpgs - Evaluate TPGS setting
057ea7c9
HR
274 * @sdev: device to be checked
275 *
d7c48feb 276 * Examine the TPGS setting of the sdev to find out if ALUA
057ea7c9
HR
277 * is supported.
278 */
ad0ea64c 279static int alua_check_tpgs(struct scsi_device *sdev)
057ea7c9 280{
ad0ea64c 281 int tpgs = TPGS_MODE_NONE;
057ea7c9 282
db5a6a60
HR
283 /*
284 * ALUA support for non-disk devices is fraught with
285 * difficulties, so disable it for now.
286 */
287 if (sdev->type != TYPE_DISK) {
db5a6a60
HR
288 sdev_printk(KERN_INFO, sdev,
289 "%s: disable for non-disk devices\n",
290 ALUA_DH_NAME);
ad0ea64c 291 return tpgs;
db5a6a60
HR
292 }
293
ad0ea64c
HR
294 tpgs = scsi_device_tpgs(sdev);
295 switch (tpgs) {
057ea7c9
HR
296 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
297 sdev_printk(KERN_INFO, sdev,
298 "%s: supports implicit and explicit TPGS\n",
299 ALUA_DH_NAME);
300 break;
301 case TPGS_MODE_EXPLICIT:
302 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
303 ALUA_DH_NAME);
304 break;
305 case TPGS_MODE_IMPLICIT:
306 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
307 ALUA_DH_NAME);
308 break;
6cc05d45 309 case TPGS_MODE_NONE:
057ea7c9
HR
310 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
311 ALUA_DH_NAME);
057ea7c9 312 break;
6cc05d45
HR
313 default:
314 sdev_printk(KERN_INFO, sdev,
315 "%s: unsupported TPGS setting %d\n",
ad0ea64c
HR
316 ALUA_DH_NAME, tpgs);
317 tpgs = TPGS_MODE_NONE;
6cc05d45 318 break;
057ea7c9
HR
319 }
320
ad0ea64c 321 return tpgs;
057ea7c9
HR
322}
323
324/*
9b80dcec 325 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
057ea7c9
HR
326 * @sdev: device to be checked
327 *
328 * Extract the relative target port and the target port group
329 * descriptor from the list of identificators.
330 */
a4253fde
HR
331static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
332 int tpgs)
057ea7c9 333{
83ea0e5e 334 int rel_port = -1, group_id;
03197b61 335 struct alua_port_group *pg, *old_pg = NULL;
057ea7c9 336
83ea0e5e
HR
337 group_id = scsi_vpd_tpg_id(sdev, &rel_port);
338 if (group_id < 0) {
057ea7c9
HR
339 /*
340 * Internal error; TPGS supported but required
341 * VPD identification descriptors not present.
342 * Disable ALUA support
343 */
344 sdev_printk(KERN_INFO, sdev,
345 "%s: No target port descriptors found\n",
346 ALUA_DH_NAME);
9b80dcec 347 return SCSI_DH_DEV_UNSUPP;
057ea7c9 348 }
a4253fde 349
03197b61
HR
350 pg = alua_alloc_pg(sdev, group_id, tpgs);
351 if (IS_ERR(pg)) {
352 if (PTR_ERR(pg) == -ENOMEM)
a4253fde
HR
353 return SCSI_DH_NOMEM;
354 return SCSI_DH_DEV_UNSUPP;
355 }
9b80dcec 356 sdev_printk(KERN_INFO, sdev,
a4253fde 357 "%s: device %s port group %x rel port %x\n",
03197b61
HR
358 ALUA_DH_NAME, pg->device_id_str, group_id, rel_port);
359
360 /* Check for existing port group references */
361 spin_lock(&h->pg_lock);
362 old_pg = h->pg;
363 if (old_pg != pg) {
364 /* port group has changed. Update to new port group */
365 rcu_assign_pointer(h->pg, pg);
366 }
367 alua_rtpg_queue(h->pg, sdev, NULL);
368 spin_unlock(&h->pg_lock);
369
370 if (old_pg)
371 kref_put(&old_pg->kref, release_port_group);
057ea7c9 372
03197b61 373 return SCSI_DH_OK;
057ea7c9
HR
374}
375
376static char print_alua_state(int state)
377{
378 switch (state) {
379 case TPGS_STATE_OPTIMIZED:
380 return 'A';
381 case TPGS_STATE_NONOPTIMIZED:
382 return 'N';
383 case TPGS_STATE_STANDBY:
384 return 'S';
385 case TPGS_STATE_UNAVAILABLE:
386 return 'U';
69723d17
HR
387 case TPGS_STATE_LBA_DEPENDENT:
388 return 'L';
057ea7c9
HR
389 case TPGS_STATE_OFFLINE:
390 return 'O';
391 case TPGS_STATE_TRANSITIONING:
392 return 'T';
393 default:
394 return 'X';
395 }
396}
397
398static int alua_check_sense(struct scsi_device *sdev,
399 struct scsi_sense_hdr *sense_hdr)
400{
401 switch (sense_hdr->sense_key) {
402 case NOT_READY:
403 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
404 /*
405 * LUN Not Accessible - ALUA state transition
406 */
c7dbb627 407 return ADD_TO_MLQUEUE;
057ea7c9
HR
408 break;
409 case UNIT_ATTENTION:
410 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
411 /*
412 * Power On, Reset, or Bus Device Reset, just retry.
413 */
c7dbb627 414 return ADD_TO_MLQUEUE;
c20ee7b5
SS
415 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
416 /*
417 * Device internal reset
418 */
419 return ADD_TO_MLQUEUE;
410f02d8
MB
420 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
421 /*
422 * Mode Parameters Changed
423 */
424 return ADD_TO_MLQUEUE;
bf81973a 425 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
057ea7c9
HR
426 /*
427 * ALUA state changed
428 */
c7dbb627 429 return ADD_TO_MLQUEUE;
bf81973a 430 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
057ea7c9
HR
431 /*
432 * Implicit ALUA state transition failed
433 */
c7dbb627 434 return ADD_TO_MLQUEUE;
bf81973a
MB
435 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
436 /*
437 * Inquiry data has changed
438 */
439 return ADD_TO_MLQUEUE;
440 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
4d086f6b
IH
441 /*
442 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
443 * when switching controllers on targets like
444 * Intel Multi-Flex. We can just retry.
445 */
446 return ADD_TO_MLQUEUE;
057ea7c9
HR
447 break;
448 }
449
450 return SCSI_RETURN_NOT_HANDLED;
451}
452
057ea7c9
HR
453/*
454 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
455 * @sdev: the device to be evaluated.
456 *
457 * Evaluate the Target Port Group State.
458 * Returns SCSI_DH_DEV_OFFLINED if the path is
25985edc 459 * found to be unusable.
057ea7c9 460 */
28261402 461static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
057ea7c9
HR
462{
463 struct scsi_sense_hdr sense_hdr;
c49c8345
HR
464 int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE;
465 unsigned char *ucp, *buff;
5597cafc 466 unsigned err, retval;
3588c5a2
RE
467 unsigned int tpg_desc_tbl_off;
468 unsigned char orig_transition_tmo;
469
03197b61
HR
470 if (!pg->expiry) {
471 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
472
473 if (pg->transition_tmo)
474 transition_tmo = pg->transition_tmo * HZ;
475
476 pg->expiry = round_jiffies_up(jiffies + transition_tmo);
477 }
057ea7c9 478
c49c8345
HR
479 buff = kzalloc(bufflen, GFP_KERNEL);
480 if (!buff)
481 return SCSI_DH_DEV_TEMP_BUSY;
482
057ea7c9 483 retry:
43394c67 484 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
40bb61a7 485
5597cafc 486 if (retval) {
40bb61a7 487 if (!scsi_sense_valid(&sense_hdr)) {
5597cafc
HR
488 sdev_printk(KERN_INFO, sdev,
489 "%s: rtpg failed, result %d\n",
490 ALUA_DH_NAME, retval);
c49c8345 491 kfree(buff);
40bb61a7 492 if (driver_byte(retval) == DRIVER_ERROR)
5597cafc 493 return SCSI_DH_DEV_TEMP_BUSY;
057ea7c9 494 return SCSI_DH_IO;
5597cafc 495 }
057ea7c9 496
8e67ce60
RE
497 /*
498 * submit_rtpg() has failed on existing arrays
499 * when requesting extended header info, and
500 * the array doesn't support extended headers,
501 * even though it shouldn't according to T10.
502 * The retry without rtpg_ext_hdr_req set
503 * handles this.
504 */
43394c67 505 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
8e67ce60
RE
506 sense_hdr.sense_key == ILLEGAL_REQUEST &&
507 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
43394c67 508 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
8e67ce60
RE
509 goto retry;
510 }
e2d817db
HR
511 /*
512 * Retry on ALUA state transition or if any
513 * UNIT ATTENTION occurred.
514 */
515 if (sense_hdr.sense_key == NOT_READY &&
516 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
517 err = SCSI_DH_RETRY;
518 else if (sense_hdr.sense_key == UNIT_ATTENTION)
519 err = SCSI_DH_RETRY;
03197b61
HR
520 if (err == SCSI_DH_RETRY &&
521 pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
80bd68d6
HR
522 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
523 ALUA_DH_NAME);
524 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
03197b61 525 return err;
80bd68d6
HR
526 }
527 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
528 ALUA_DH_NAME);
529 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
c49c8345 530 kfree(buff);
03197b61 531 pg->expiry = 0;
80bd68d6 532 return SCSI_DH_IO;
057ea7c9 533 }
057ea7c9 534
c49c8345 535 len = get_unaligned_be32(&buff[0]) + 4;
057ea7c9 536
c49c8345 537 if (len > bufflen) {
057ea7c9 538 /* Resubmit with the correct length */
c49c8345
HR
539 kfree(buff);
540 bufflen = len;
541 buff = kmalloc(bufflen, GFP_KERNEL);
542 if (!buff) {
057ea7c9 543 sdev_printk(KERN_WARNING, sdev,
cadbd4a5 544 "%s: kmalloc buffer failed\n",__func__);
057ea7c9 545 /* Temporary failure, bypass */
03197b61 546 pg->expiry = 0;
057ea7c9
HR
547 return SCSI_DH_DEV_TEMP_BUSY;
548 }
549 goto retry;
550 }
551
43394c67 552 orig_transition_tmo = pg->transition_tmo;
c49c8345 553 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
43394c67 554 pg->transition_tmo = buff[5];
3588c5a2 555 else
43394c67 556 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
3588c5a2 557
28261402 558 if (orig_transition_tmo != pg->transition_tmo) {
3588c5a2
RE
559 sdev_printk(KERN_INFO, sdev,
560 "%s: transition timeout set to %d seconds\n",
43394c67 561 ALUA_DH_NAME, pg->transition_tmo);
03197b61 562 pg->expiry = jiffies + pg->transition_tmo * HZ;
3588c5a2
RE
563 }
564
c49c8345 565 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
3588c5a2
RE
566 tpg_desc_tbl_off = 8;
567 else
568 tpg_desc_tbl_off = 4;
569
c49c8345 570 for (k = tpg_desc_tbl_off, ucp = buff + tpg_desc_tbl_off;
3588c5a2
RE
571 k < len;
572 k += off, ucp += off) {
573
43394c67
HR
574 if (pg->group_id == get_unaligned_be16(&ucp[2])) {
575 pg->state = ucp[0] & 0x0f;
576 pg->pref = ucp[0] >> 7;
057ea7c9
HR
577 valid_states = ucp[1];
578 }
579 off = 8 + (ucp[7] * 4);
580 }
581
582 sdev_printk(KERN_INFO, sdev,
dcd3a754 583 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
43394c67
HR
584 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
585 pg->pref ? "preferred" : "non-preferred",
057ea7c9
HR
586 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
587 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
69723d17 588 valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
057ea7c9
HR
589 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
590 valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
591 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
592 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
593
43394c67 594 switch (pg->state) {
69723d17 595 case TPGS_STATE_TRANSITIONING:
03197b61 596 if (time_before(jiffies, pg->expiry)) {
28261402 597 /* State transition, retry */
03197b61
HR
598 pg->interval = 2;
599 err = SCSI_DH_RETRY;
600 } else {
601 /* Transitioning time exceeded, set port to standby */
602 err = SCSI_DH_IO;
603 pg->state = TPGS_STATE_STANDBY;
604 pg->expiry = 0;
057ea7c9 605 }
69723d17
HR
606 break;
607 case TPGS_STATE_OFFLINE:
e47f8976 608 /* Path unusable */
69723d17 609 err = SCSI_DH_DEV_OFFLINED;
03197b61 610 pg->expiry = 0;
69723d17
HR
611 break;
612 default:
613 /* Useable path if active */
614 err = SCSI_DH_OK;
03197b61 615 pg->expiry = 0;
69723d17 616 break;
057ea7c9 617 }
c49c8345 618 kfree(buff);
057ea7c9
HR
619 return err;
620}
621
f2ecf13a
HR
622/*
623 * alua_stpg - Issue a SET TARGET PORT GROUP command
624 *
625 * Issue a SET TARGET PORT GROUP command and evaluate the
b2460756
HR
626 * response. Returns SCSI_DH_RETRY per default to trigger
627 * a re-evaluation of the target group state or SCSI_DH_OK
628 * if no further action needs to be taken.
f2ecf13a 629 */
43394c67 630static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
f2ecf13a 631{
b2460756
HR
632 int retval;
633 struct scsi_sense_hdr sense_hdr;
f2ecf13a 634
43394c67 635 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
b2460756
HR
636 /* Only implicit ALUA supported, retry */
637 return SCSI_DH_RETRY;
638 }
43394c67 639 switch (pg->state) {
b2460756
HR
640 case TPGS_STATE_OPTIMIZED:
641 return SCSI_DH_OK;
f2ecf13a 642 case TPGS_STATE_NONOPTIMIZED:
43394c67
HR
643 if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
644 !pg->pref &&
645 (pg->tpgs & TPGS_MODE_IMPLICIT))
b2460756 646 return SCSI_DH_OK;
f2ecf13a
HR
647 break;
648 case TPGS_STATE_STANDBY:
649 case TPGS_STATE_UNAVAILABLE:
f2ecf13a
HR
650 break;
651 case TPGS_STATE_OFFLINE:
b2460756 652 return SCSI_DH_IO;
f2ecf13a 653 case TPGS_STATE_TRANSITIONING:
f2ecf13a
HR
654 break;
655 default:
b2460756
HR
656 sdev_printk(KERN_INFO, sdev,
657 "%s: stpg failed, unhandled TPGS state %d",
43394c67 658 ALUA_DH_NAME, pg->state);
b2460756 659 return SCSI_DH_NOSYS;
f2ecf13a 660 }
43394c67 661 retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
f2ecf13a 662
b2460756 663 if (retval) {
40bb61a7 664 if (!scsi_sense_valid(&sense_hdr)) {
b2460756
HR
665 sdev_printk(KERN_INFO, sdev,
666 "%s: stpg failed, result %d",
667 ALUA_DH_NAME, retval);
40bb61a7 668 if (driver_byte(retval) == DRIVER_ERROR)
b2460756
HR
669 return SCSI_DH_DEV_TEMP_BUSY;
670 } else {
43394c67 671 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
b2460756
HR
672 ALUA_DH_NAME);
673 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
674 }
f2ecf13a 675 }
b2460756
HR
676 /* Retry RTPG */
677 return SCSI_DH_RETRY;
f2ecf13a
HR
678}
679
03197b61
HR
680static void alua_rtpg_work(struct work_struct *work)
681{
682 struct alua_port_group *pg =
683 container_of(work, struct alua_port_group, rtpg_work.work);
684 struct scsi_device *sdev;
685 LIST_HEAD(qdata_list);
686 int err = SCSI_DH_OK;
687 struct alua_queue_data *qdata, *tmp;
688 unsigned long flags;
689
690 spin_lock_irqsave(&pg->lock, flags);
691 sdev = pg->rtpg_sdev;
692 if (!sdev) {
693 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
694 WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
695 spin_unlock_irqrestore(&pg->lock, flags);
696 return;
697 }
698 pg->flags |= ALUA_PG_RUNNING;
699 if (pg->flags & ALUA_PG_RUN_RTPG) {
700 pg->flags &= ~ALUA_PG_RUN_RTPG;
701 spin_unlock_irqrestore(&pg->lock, flags);
702 err = alua_rtpg(sdev, pg);
703 spin_lock_irqsave(&pg->lock, flags);
704 if (err == SCSI_DH_RETRY) {
705 pg->flags &= ~ALUA_PG_RUNNING;
706 pg->flags |= ALUA_PG_RUN_RTPG;
707 spin_unlock_irqrestore(&pg->lock, flags);
708 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
709 pg->interval * HZ);
710 return;
711 }
712 if (err != SCSI_DH_OK)
713 pg->flags &= ~ALUA_PG_RUN_STPG;
714 }
715 if (pg->flags & ALUA_PG_RUN_STPG) {
716 pg->flags &= ~ALUA_PG_RUN_STPG;
717 spin_unlock_irqrestore(&pg->lock, flags);
718 err = alua_stpg(sdev, pg);
719 spin_lock_irqsave(&pg->lock, flags);
720 if (err == SCSI_DH_RETRY) {
721 pg->flags |= ALUA_PG_RUN_RTPG;
722 pg->interval = 0;
723 pg->flags &= ~ALUA_PG_RUNNING;
724 spin_unlock_irqrestore(&pg->lock, flags);
725 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
726 pg->interval * HZ);
727 return;
728 }
729 }
730
731 list_splice_init(&pg->rtpg_list, &qdata_list);
732 pg->rtpg_sdev = NULL;
733 spin_unlock_irqrestore(&pg->lock, flags);
734
735 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
736 list_del(&qdata->entry);
737 if (qdata->callback_fn)
738 qdata->callback_fn(qdata->callback_data, err);
739 kfree(qdata);
740 }
741 spin_lock_irqsave(&pg->lock, flags);
742 pg->flags &= ~ALUA_PG_RUNNING;
743 spin_unlock_irqrestore(&pg->lock, flags);
744 scsi_device_put(sdev);
745 kref_put(&pg->kref, release_port_group);
746}
747
748static void alua_rtpg_queue(struct alua_port_group *pg,
749 struct scsi_device *sdev,
750 struct alua_queue_data *qdata)
751{
752 int start_queue = 0;
753 unsigned long flags;
754
755 if (!pg)
756 return;
757
758 spin_lock_irqsave(&pg->lock, flags);
759 if (qdata) {
760 list_add_tail(&qdata->entry, &pg->rtpg_list);
761 pg->flags |= ALUA_PG_RUN_STPG;
762 }
763 if (pg->rtpg_sdev == NULL) {
764 pg->interval = 0;
765 pg->flags |= ALUA_PG_RUN_RTPG;
766 kref_get(&pg->kref);
767 pg->rtpg_sdev = sdev;
768 scsi_device_get(sdev);
769 start_queue = 1;
770 }
771 spin_unlock_irqrestore(&pg->lock, flags);
772
773 if (start_queue &&
774 !queue_delayed_work(kaluad_wq, &pg->rtpg_work,
775 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
776 scsi_device_put(sdev);
777 kref_put(&pg->kref, release_port_group);
778 }
779}
780
057ea7c9
HR
781/*
782 * alua_initialize - Initialize ALUA state
783 * @sdev: the device to be initialized
784 *
785 * For the prep_fn to work correctly we have
786 * to initialize the ALUA state for the device.
787 */
788static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
789{
43394c67 790 int err = SCSI_DH_DEV_UNSUPP, tpgs;
057ea7c9 791
03197b61 792 mutex_lock(&h->init_mutex);
43394c67 793 tpgs = alua_check_tpgs(sdev);
a4253fde
HR
794 if (tpgs != TPGS_MODE_NONE)
795 err = alua_check_vpd(sdev, h, tpgs);
03197b61
HR
796 h->init_error = err;
797 mutex_unlock(&h->init_mutex);
057ea7c9
HR
798 return err;
799}
4335d092
MB
800/*
801 * alua_set_params - set/unset the optimize flag
802 * @sdev: device on the path to be activated
803 * params - parameters in the following format
804 * "no_of_params\0param1\0param2\0param3\0...\0"
805 * For example, to set the flag pass the following parameters
806 * from multipath.conf
807 * hardware_handler "2 alua 1"
808 */
809static int alua_set_params(struct scsi_device *sdev, const char *params)
810{
ee14c674 811 struct alua_dh_data *h = sdev->handler_data;
03197b61 812 struct alua_port_group __rcu *pg = NULL;
4335d092
MB
813 unsigned int optimize = 0, argc;
814 const char *p = params;
815 int result = SCSI_DH_OK;
03197b61 816 unsigned long flags;
4335d092
MB
817
818 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
819 return -EINVAL;
820
821 while (*p++)
822 ;
823 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
824 return -EINVAL;
825
03197b61
HR
826 rcu_read_lock();
827 pg = rcu_dereference(h->pg);
828 if (!pg) {
829 rcu_read_unlock();
43394c67 830 return -ENXIO;
03197b61
HR
831 }
832 spin_lock_irqsave(&pg->lock, flags);
4335d092 833 if (optimize)
43394c67 834 pg->flags |= ALUA_OPTIMIZE_STPG;
4335d092 835 else
43394c67 836 pg->flags &= ~ALUA_OPTIMIZE_STPG;
03197b61
HR
837 spin_unlock_irqrestore(&pg->lock, flags);
838 rcu_read_unlock();
4335d092
MB
839
840 return result;
841}
057ea7c9
HR
842
843/*
844 * alua_activate - activate a path
845 * @sdev: device on the path to be activated
846 *
847 * We're currently switching the port group to be activated only and
848 * let the array figure out the rest.
849 * There may be other arrays which require us to switch all port groups
850 * based on a certain policy. But until we actually encounter them it
851 * should be okay.
852 */
3ae31f6a
CS
853static int alua_activate(struct scsi_device *sdev,
854 activate_complete fn, void *data)
057ea7c9 855{
ee14c674 856 struct alua_dh_data *h = sdev->handler_data;
057ea7c9 857 int err = SCSI_DH_OK;
03197b61
HR
858 struct alua_queue_data *qdata;
859 struct alua_port_group __rcu *pg;
057ea7c9 860
03197b61
HR
861 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
862 if (!qdata) {
863 err = SCSI_DH_RES_TEMP_UNAVAIL;
46ccf6b5 864 goto out;
03197b61
HR
865 }
866 qdata->callback_fn = fn;
867 qdata->callback_data = data;
868
869 mutex_lock(&h->init_mutex);
870 rcu_read_lock();
871 pg = rcu_dereference(h->pg);
872 if (!pg || !kref_get_unless_zero(&pg->kref)) {
873 rcu_read_unlock();
874 kfree(qdata);
875 err = h->init_error;
876 mutex_unlock(&h->init_mutex);
43394c67
HR
877 goto out;
878 }
03197b61
HR
879 fn = NULL;
880 rcu_read_unlock();
881 mutex_unlock(&h->init_mutex);
882
883 alua_rtpg_queue(pg, sdev, qdata);
884 kref_put(&pg->kref, release_port_group);
057ea7c9 885out:
b2460756 886 if (fn)
3ae31f6a
CS
887 fn(data, err);
888 return 0;
057ea7c9
HR
889}
890
891/*
892 * alua_prep_fn - request callback
893 *
894 * Fail I/O to all paths not in state
895 * active/optimized or active/non-optimized.
896 */
897static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
898{
ee14c674 899 struct alua_dh_data *h = sdev->handler_data;
03197b61
HR
900 struct alua_port_group __rcu *pg;
901 int state = TPGS_STATE_OPTIMIZED;
057ea7c9
HR
902 int ret = BLKPREP_OK;
903
03197b61
HR
904 rcu_read_lock();
905 pg = rcu_dereference(h->pg);
906 if (pg)
907 state = pg->state;
908 rcu_read_unlock();
43394c67 909 if (state == TPGS_STATE_TRANSITIONING)
69723d17 910 ret = BLKPREP_DEFER;
43394c67
HR
911 else if (state != TPGS_STATE_OPTIMIZED &&
912 state != TPGS_STATE_NONOPTIMIZED &&
913 state != TPGS_STATE_LBA_DEPENDENT) {
057ea7c9
HR
914 ret = BLKPREP_KILL;
915 req->cmd_flags |= REQ_QUIET;
916 }
917 return ret;
918
919}
920
057ea7c9
HR
921/*
922 * alua_bus_attach - Attach device handler
923 * @sdev: device to be attached to
924 */
ee14c674 925static int alua_bus_attach(struct scsi_device *sdev)
057ea7c9 926{
057ea7c9 927 struct alua_dh_data *h;
43394c67 928 int err, ret = -EINVAL;
057ea7c9 929
cd37743f 930 h = kzalloc(sizeof(*h) , GFP_KERNEL);
1d520328 931 if (!h)
ee14c674 932 return -ENOMEM;
03197b61
HR
933 spin_lock_init(&h->pg_lock);
934 rcu_assign_pointer(h->pg, NULL);
935 h->init_error = SCSI_DH_OK;
96e65865 936 h->sdev = sdev;
057ea7c9 937
03197b61 938 mutex_init(&h->init_mutex);
057ea7c9 939 err = alua_initialize(sdev, h);
43394c67
HR
940 if (err == SCSI_DH_NOMEM)
941 ret = -ENOMEM;
1d520328 942 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
057ea7c9
HR
943 goto failed;
944
ee14c674
CH
945 sdev->handler_data = h;
946 return 0;
057ea7c9 947failed:
cd37743f 948 kfree(h);
43394c67 949 return ret;
057ea7c9
HR
950}
951
952/*
953 * alua_bus_detach - Detach device handler
954 * @sdev: device to be detached from
955 */
956static void alua_bus_detach(struct scsi_device *sdev)
957{
ee14c674 958 struct alua_dh_data *h = sdev->handler_data;
03197b61
HR
959 struct alua_port_group *pg;
960
961 spin_lock(&h->pg_lock);
962 pg = h->pg;
963 rcu_assign_pointer(h->pg, NULL);
964 h->sdev = NULL;
965 spin_unlock(&h->pg_lock);
966 if (pg)
967 kref_put(&pg->kref, release_port_group);
057ea7c9 968
ee14c674 969 sdev->handler_data = NULL;
cd37743f 970 kfree(h);
057ea7c9
HR
971}
972
1d520328
CH
973static struct scsi_device_handler alua_dh = {
974 .name = ALUA_DH_NAME,
975 .module = THIS_MODULE,
976 .attach = alua_bus_attach,
977 .detach = alua_bus_detach,
978 .prep_fn = alua_prep_fn,
979 .check_sense = alua_check_sense,
980 .activate = alua_activate,
981 .set_params = alua_set_params,
1d520328
CH
982};
983
057ea7c9
HR
984static int __init alua_init(void)
985{
986 int r;
987
03197b61
HR
988 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
989 if (!kaluad_wq) {
990 /* Temporary failure, bypass */
991 return SCSI_DH_DEV_TEMP_BUSY;
992 }
057ea7c9 993 r = scsi_register_device_handler(&alua_dh);
03197b61 994 if (r != 0) {
057ea7c9
HR
995 printk(KERN_ERR "%s: Failed to register scsi device handler",
996 ALUA_DH_NAME);
03197b61
HR
997 destroy_workqueue(kaluad_wq);
998 }
057ea7c9
HR
999 return r;
1000}
1001
1002static void __exit alua_exit(void)
1003{
1004 scsi_unregister_device_handler(&alua_dh);
03197b61 1005 destroy_workqueue(kaluad_wq);
057ea7c9
HR
1006}
1007
1008module_init(alua_init);
1009module_exit(alua_exit);
1010
1011MODULE_DESCRIPTION("DM Multipath ALUA support");
1012MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1013MODULE_LICENSE("GPL");
1014MODULE_VERSION(ALUA_DH_VER);