2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
54 #include <scsi/scsi_transport_srp.h>
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
65 #if !defined(CONFIG_DYNAMIC_DEBUG)
66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 static unsigned int srp_sg_tablesize;
71 static unsigned int cmd_sg_entries;
72 static unsigned int indirect_sg_entries;
73 static bool allow_ext_sg;
74 static bool prefer_fr = true;
75 static bool register_always = true;
76 static bool never_register;
77 static int topspin_workarounds = 1;
79 module_param(srp_sg_tablesize, uint, 0444);
80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
82 module_param(cmd_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(cmd_sg_entries,
84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
86 module_param(indirect_sg_entries, uint, 0444);
87 MODULE_PARM_DESC(indirect_sg_entries,
88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
90 module_param(allow_ext_sg, bool, 0444);
91 MODULE_PARM_DESC(allow_ext_sg,
92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
94 module_param(topspin_workarounds, int, 0444);
95 MODULE_PARM_DESC(topspin_workarounds,
96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
98 module_param(prefer_fr, bool, 0444);
99 MODULE_PARM_DESC(prefer_fr,
100 "Whether to use fast registration if both FMR and fast registration are supported");
102 module_param(register_always, bool, 0444);
103 MODULE_PARM_DESC(register_always,
104 "Use memory registration even for contiguous memory regions");
106 module_param(never_register, bool, 0444);
107 MODULE_PARM_DESC(never_register, "Never register memory");
109 static const struct kernel_param_ops srp_tmo_ops;
111 static int srp_reconnect_delay = 10;
112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
116 static int srp_fast_io_fail_tmo = 15;
117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
119 MODULE_PARM_DESC(fast_io_fail_tmo,
120 "Number of seconds between the observation of a transport"
121 " layer error and failing all I/O. \"off\" means that this"
122 " functionality is disabled.");
124 static int srp_dev_loss_tmo = 600;
125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
127 MODULE_PARM_DESC(dev_loss_tmo,
128 "Maximum number of seconds that the SRP transport should"
129 " insulate transport layer errors. After this time has been"
130 " exceeded the SCSI host is removed. Should be"
131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
132 " if fast_io_fail_tmo has not been set. \"off\" means that"
133 " this functionality is disabled.");
135 static bool srp_use_imm_data = true;
136 module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
137 MODULE_PARM_DESC(use_imm_data,
138 "Whether or not to request permission to use immediate data during SRP login.");
140 static unsigned int srp_max_imm_data = 8 * 1024;
141 module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
142 MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
144 static unsigned ch_count;
145 module_param(ch_count, uint, 0444);
146 MODULE_PARM_DESC(ch_count,
147 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
149 static void srp_add_one(struct ib_device *device);
150 static void srp_remove_one(struct ib_device *device, void *client_data);
151 static void srp_rename_dev(struct ib_device *device, void *client_data);
152 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
153 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
155 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
156 const struct ib_cm_event *event);
157 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
158 struct rdma_cm_event *event);
160 static struct scsi_transport_template *ib_srp_transport_template;
161 static struct workqueue_struct *srp_remove_wq;
163 static struct ib_client srp_client = {
166 .remove = srp_remove_one,
167 .rename = srp_rename_dev
170 static struct ib_sa_client srp_sa_client;
172 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
174 int tmo = *(int *)kp->arg;
177 return sprintf(buffer, "%d", tmo);
179 return sprintf(buffer, "off");
182 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
186 res = srp_parse_tmo(&tmo, val);
190 if (kp->arg == &srp_reconnect_delay)
191 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
193 else if (kp->arg == &srp_fast_io_fail_tmo)
194 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
196 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
200 *(int *)kp->arg = tmo;
206 static const struct kernel_param_ops srp_tmo_ops = {
211 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
213 return (struct srp_target_port *) host->hostdata;
216 static const char *srp_target_info(struct Scsi_Host *host)
218 return host_to_target(host)->target_name;
221 static int srp_target_is_topspin(struct srp_target_port *target)
223 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
224 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
226 return topspin_workarounds &&
227 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
228 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
231 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
233 enum dma_data_direction direction)
237 iu = kmalloc(sizeof *iu, gfp_mask);
241 iu->buf = kzalloc(size, gfp_mask);
245 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
247 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
251 iu->direction = direction;
263 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
268 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
274 static void srp_qp_event(struct ib_event *event, void *context)
276 pr_debug("QP event %s (%d)\n",
277 ib_event_msg(event->event), event->event);
280 static int srp_init_ib_qp(struct srp_target_port *target,
283 struct ib_qp_attr *attr;
286 attr = kmalloc(sizeof *attr, GFP_KERNEL);
290 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
291 target->srp_host->port,
292 be16_to_cpu(target->ib_cm.pkey),
297 attr->qp_state = IB_QPS_INIT;
298 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
299 IB_ACCESS_REMOTE_WRITE);
300 attr->port_num = target->srp_host->port;
302 ret = ib_modify_qp(qp, attr,
313 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
315 struct srp_target_port *target = ch->target;
316 struct ib_cm_id *new_cm_id;
318 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
319 srp_ib_cm_handler, ch);
320 if (IS_ERR(new_cm_id))
321 return PTR_ERR(new_cm_id);
324 ib_destroy_cm_id(ch->ib_cm.cm_id);
325 ch->ib_cm.cm_id = new_cm_id;
326 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
327 target->srp_host->port))
328 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
330 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
331 ch->ib_cm.path.sgid = target->sgid;
332 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
333 ch->ib_cm.path.pkey = target->ib_cm.pkey;
334 ch->ib_cm.path.service_id = target->ib_cm.service_id;
339 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
341 struct srp_target_port *target = ch->target;
342 struct rdma_cm_id *new_cm_id;
345 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
346 RDMA_PS_TCP, IB_QPT_RC);
347 if (IS_ERR(new_cm_id)) {
348 ret = PTR_ERR(new_cm_id);
353 init_completion(&ch->done);
354 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
355 (struct sockaddr *)&target->rdma_cm.src : NULL,
356 (struct sockaddr *)&target->rdma_cm.dst,
357 SRP_PATH_REC_TIMEOUT_MS);
359 pr_err("No route available from %pIS to %pIS (%d)\n",
360 &target->rdma_cm.src, &target->rdma_cm.dst, ret);
363 ret = wait_for_completion_interruptible(&ch->done);
369 pr_err("Resolving address %pIS failed (%d)\n",
370 &target->rdma_cm.dst, ret);
374 swap(ch->rdma_cm.cm_id, new_cm_id);
378 rdma_destroy_id(new_cm_id);
383 static int srp_new_cm_id(struct srp_rdma_ch *ch)
385 struct srp_target_port *target = ch->target;
387 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
388 srp_new_ib_cm_id(ch);
391 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
393 struct srp_device *dev = target->srp_host->srp_dev;
394 struct ib_fmr_pool_param fmr_param;
396 memset(&fmr_param, 0, sizeof(fmr_param));
397 fmr_param.pool_size = target->mr_pool_size;
398 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
400 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
401 fmr_param.page_shift = ilog2(dev->mr_page_size);
402 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
403 IB_ACCESS_REMOTE_WRITE |
404 IB_ACCESS_REMOTE_READ);
406 return ib_create_fmr_pool(dev->pd, &fmr_param);
410 * srp_destroy_fr_pool() - free the resources owned by a pool
411 * @pool: Fast registration pool to be destroyed.
413 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
416 struct srp_fr_desc *d;
421 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
429 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
430 * @device: IB device to allocate fast registration descriptors for.
431 * @pd: Protection domain associated with the FR descriptors.
432 * @pool_size: Number of descriptors to allocate.
433 * @max_page_list_len: Maximum fast registration work request page list length.
435 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
436 struct ib_pd *pd, int pool_size,
437 int max_page_list_len)
439 struct srp_fr_pool *pool;
440 struct srp_fr_desc *d;
442 int i, ret = -EINVAL;
443 enum ib_mr_type mr_type;
448 pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL);
451 pool->size = pool_size;
452 pool->max_page_list_len = max_page_list_len;
453 spin_lock_init(&pool->lock);
454 INIT_LIST_HEAD(&pool->free_list);
456 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
457 mr_type = IB_MR_TYPE_SG_GAPS;
459 mr_type = IB_MR_TYPE_MEM_REG;
461 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
462 mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
466 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
467 dev_name(&device->dev));
471 list_add_tail(&d->entry, &pool->free_list);
478 srp_destroy_fr_pool(pool);
486 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
487 * @pool: Pool to obtain descriptor from.
489 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
491 struct srp_fr_desc *d = NULL;
494 spin_lock_irqsave(&pool->lock, flags);
495 if (!list_empty(&pool->free_list)) {
496 d = list_first_entry(&pool->free_list, typeof(*d), entry);
499 spin_unlock_irqrestore(&pool->lock, flags);
505 * srp_fr_pool_put() - put an FR descriptor back in the free list
506 * @pool: Pool the descriptor was allocated from.
507 * @desc: Pointer to an array of fast registration descriptor pointers.
508 * @n: Number of descriptors to put back.
510 * Note: The caller must already have queued an invalidation request for
511 * desc->mr->rkey before calling this function.
513 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
519 spin_lock_irqsave(&pool->lock, flags);
520 for (i = 0; i < n; i++)
521 list_add(&desc[i]->entry, &pool->free_list);
522 spin_unlock_irqrestore(&pool->lock, flags);
525 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
527 struct srp_device *dev = target->srp_host->srp_dev;
529 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
530 dev->max_pages_per_mr);
534 * srp_destroy_qp() - destroy an RDMA queue pair
535 * @ch: SRP RDMA channel.
537 * Drain the qp before destroying it. This avoids that the receive
538 * completion handler can access the queue pair while it is
541 static void srp_destroy_qp(struct srp_rdma_ch *ch)
543 spin_lock_irq(&ch->lock);
544 ib_process_cq_direct(ch->send_cq, -1);
545 spin_unlock_irq(&ch->lock);
548 ib_destroy_qp(ch->qp);
551 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
553 struct srp_target_port *target = ch->target;
554 struct srp_device *dev = target->srp_host->srp_dev;
555 struct ib_qp_init_attr *init_attr;
556 struct ib_cq *recv_cq, *send_cq;
558 struct ib_fmr_pool *fmr_pool = NULL;
559 struct srp_fr_pool *fr_pool = NULL;
560 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
563 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
567 /* queue_size + 1 for ib_drain_rq() */
568 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
569 ch->comp_vector, IB_POLL_SOFTIRQ);
570 if (IS_ERR(recv_cq)) {
571 ret = PTR_ERR(recv_cq);
575 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
576 ch->comp_vector, IB_POLL_DIRECT);
577 if (IS_ERR(send_cq)) {
578 ret = PTR_ERR(send_cq);
582 init_attr->event_handler = srp_qp_event;
583 init_attr->cap.max_send_wr = m * target->queue_size;
584 init_attr->cap.max_recv_wr = target->queue_size + 1;
585 init_attr->cap.max_recv_sge = 1;
586 init_attr->cap.max_send_sge = SRP_MAX_SGE;
587 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
588 init_attr->qp_type = IB_QPT_RC;
589 init_attr->send_cq = send_cq;
590 init_attr->recv_cq = recv_cq;
592 if (target->using_rdma_cm) {
593 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
594 qp = ch->rdma_cm.cm_id->qp;
596 qp = ib_create_qp(dev->pd, init_attr);
598 ret = srp_init_ib_qp(target, qp);
606 pr_err("QP creation failed for dev %s: %d\n",
607 dev_name(&dev->dev->dev), ret);
611 if (dev->use_fast_reg) {
612 fr_pool = srp_alloc_fr_pool(target);
613 if (IS_ERR(fr_pool)) {
614 ret = PTR_ERR(fr_pool);
615 shost_printk(KERN_WARNING, target->scsi_host, PFX
616 "FR pool allocation failed (%d)\n", ret);
619 } else if (dev->use_fmr) {
620 fmr_pool = srp_alloc_fmr_pool(target);
621 if (IS_ERR(fmr_pool)) {
622 ret = PTR_ERR(fmr_pool);
623 shost_printk(KERN_WARNING, target->scsi_host, PFX
624 "FMR pool allocation failed (%d)\n", ret);
632 ib_free_cq(ch->recv_cq);
634 ib_free_cq(ch->send_cq);
637 ch->recv_cq = recv_cq;
638 ch->send_cq = send_cq;
640 if (dev->use_fast_reg) {
642 srp_destroy_fr_pool(ch->fr_pool);
643 ch->fr_pool = fr_pool;
644 } else if (dev->use_fmr) {
646 ib_destroy_fmr_pool(ch->fmr_pool);
647 ch->fmr_pool = fmr_pool;
654 if (target->using_rdma_cm)
655 rdma_destroy_qp(ch->rdma_cm.cm_id);
671 * Note: this function may be called without srp_alloc_iu_bufs() having been
672 * invoked. Hence the ch->[rt]x_ring checks.
674 static void srp_free_ch_ib(struct srp_target_port *target,
675 struct srp_rdma_ch *ch)
677 struct srp_device *dev = target->srp_host->srp_dev;
683 if (target->using_rdma_cm) {
684 if (ch->rdma_cm.cm_id) {
685 rdma_destroy_id(ch->rdma_cm.cm_id);
686 ch->rdma_cm.cm_id = NULL;
689 if (ch->ib_cm.cm_id) {
690 ib_destroy_cm_id(ch->ib_cm.cm_id);
691 ch->ib_cm.cm_id = NULL;
695 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
699 if (dev->use_fast_reg) {
701 srp_destroy_fr_pool(ch->fr_pool);
702 } else if (dev->use_fmr) {
704 ib_destroy_fmr_pool(ch->fmr_pool);
708 ib_free_cq(ch->send_cq);
709 ib_free_cq(ch->recv_cq);
712 * Avoid that the SCSI error handler tries to use this channel after
713 * it has been freed. The SCSI error handler can namely continue
714 * trying to perform recovery actions after scsi_remove_host()
720 ch->send_cq = ch->recv_cq = NULL;
723 for (i = 0; i < target->queue_size; ++i)
724 srp_free_iu(target->srp_host, ch->rx_ring[i]);
729 for (i = 0; i < target->queue_size; ++i)
730 srp_free_iu(target->srp_host, ch->tx_ring[i]);
736 static void srp_path_rec_completion(int status,
737 struct sa_path_rec *pathrec,
740 struct srp_rdma_ch *ch = ch_ptr;
741 struct srp_target_port *target = ch->target;
745 shost_printk(KERN_ERR, target->scsi_host,
746 PFX "Got failed path rec status %d\n", status);
748 ch->ib_cm.path = *pathrec;
752 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
754 struct srp_target_port *target = ch->target;
757 ch->ib_cm.path.numb_path = 1;
759 init_completion(&ch->done);
761 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
762 target->srp_host->srp_dev->dev,
763 target->srp_host->port,
765 IB_SA_PATH_REC_SERVICE_ID |
766 IB_SA_PATH_REC_DGID |
767 IB_SA_PATH_REC_SGID |
768 IB_SA_PATH_REC_NUMB_PATH |
770 SRP_PATH_REC_TIMEOUT_MS,
772 srp_path_rec_completion,
773 ch, &ch->ib_cm.path_query);
774 if (ch->ib_cm.path_query_id < 0)
775 return ch->ib_cm.path_query_id;
777 ret = wait_for_completion_interruptible(&ch->done);
782 shost_printk(KERN_WARNING, target->scsi_host,
783 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
784 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
785 be16_to_cpu(target->ib_cm.pkey),
786 be64_to_cpu(target->ib_cm.service_id));
791 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
793 struct srp_target_port *target = ch->target;
796 init_completion(&ch->done);
798 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
802 wait_for_completion_interruptible(&ch->done);
805 shost_printk(KERN_WARNING, target->scsi_host,
806 PFX "Path resolution failed\n");
811 static int srp_lookup_path(struct srp_rdma_ch *ch)
813 struct srp_target_port *target = ch->target;
815 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
816 srp_ib_lookup_path(ch);
819 static u8 srp_get_subnet_timeout(struct srp_host *host)
821 struct ib_port_attr attr;
823 u8 subnet_timeout = 18;
825 ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
827 subnet_timeout = attr.subnet_timeout;
829 if (unlikely(subnet_timeout < 15))
830 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
831 dev_name(&host->srp_dev->dev->dev), subnet_timeout);
833 return subnet_timeout;
836 static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
839 struct srp_target_port *target = ch->target;
841 struct rdma_conn_param rdma_param;
842 struct srp_login_req_rdma rdma_req;
843 struct ib_cm_req_param ib_param;
844 struct srp_login_req ib_req;
849 req = kzalloc(sizeof *req, GFP_KERNEL);
853 req->ib_param.flow_control = 1;
854 req->ib_param.retry_count = target->tl_retry_count;
857 * Pick some arbitrary defaults here; we could make these
858 * module parameters if anyone cared about setting them.
860 req->ib_param.responder_resources = 4;
861 req->ib_param.rnr_retry_count = 7;
862 req->ib_param.max_cm_retries = 15;
864 req->ib_req.opcode = SRP_LOGIN_REQ;
866 req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
867 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
868 SRP_BUF_FORMAT_INDIRECT);
869 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
870 SRP_MULTICHAN_SINGLE);
871 if (srp_use_imm_data) {
872 req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
873 req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
876 if (target->using_rdma_cm) {
877 req->rdma_param.flow_control = req->ib_param.flow_control;
878 req->rdma_param.responder_resources =
879 req->ib_param.responder_resources;
880 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
881 req->rdma_param.retry_count = req->ib_param.retry_count;
882 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
883 req->rdma_param.private_data = &req->rdma_req;
884 req->rdma_param.private_data_len = sizeof(req->rdma_req);
886 req->rdma_req.opcode = req->ib_req.opcode;
887 req->rdma_req.tag = req->ib_req.tag;
888 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
889 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
890 req->rdma_req.req_flags = req->ib_req.req_flags;
891 req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
893 ipi = req->rdma_req.initiator_port_id;
894 tpi = req->rdma_req.target_port_id;
898 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
900 req->ib_param.primary_path = &ch->ib_cm.path;
901 req->ib_param.alternate_path = NULL;
902 req->ib_param.service_id = target->ib_cm.service_id;
903 get_random_bytes(&req->ib_param.starting_psn, 4);
904 req->ib_param.starting_psn &= 0xffffff;
905 req->ib_param.qp_num = ch->qp->qp_num;
906 req->ib_param.qp_type = ch->qp->qp_type;
907 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
908 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
909 req->ib_param.private_data = &req->ib_req;
910 req->ib_param.private_data_len = sizeof(req->ib_req);
912 ipi = req->ib_req.initiator_port_id;
913 tpi = req->ib_req.target_port_id;
917 * In the published SRP specification (draft rev. 16a), the
918 * port identifier format is 8 bytes of ID extension followed
919 * by 8 bytes of GUID. Older drafts put the two halves in the
920 * opposite order, so that the GUID comes first.
922 * Targets conforming to these obsolete drafts can be
923 * recognized by the I/O Class they report.
925 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
926 memcpy(ipi, &target->sgid.global.interface_id, 8);
927 memcpy(ipi + 8, &target->initiator_ext, 8);
928 memcpy(tpi, &target->ioc_guid, 8);
929 memcpy(tpi + 8, &target->id_ext, 8);
931 memcpy(ipi, &target->initiator_ext, 8);
932 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
933 memcpy(tpi, &target->id_ext, 8);
934 memcpy(tpi + 8, &target->ioc_guid, 8);
938 * Topspin/Cisco SRP targets will reject our login unless we
939 * zero out the first 8 bytes of our initiator port ID and set
940 * the second 8 bytes to the local node GUID.
942 if (srp_target_is_topspin(target)) {
943 shost_printk(KERN_DEBUG, target->scsi_host,
944 PFX "Topspin/Cisco initiator port ID workaround "
945 "activated for target GUID %016llx\n",
946 be64_to_cpu(target->ioc_guid));
948 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
951 if (target->using_rdma_cm)
952 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
954 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
961 static bool srp_queue_remove_work(struct srp_target_port *target)
963 bool changed = false;
965 spin_lock_irq(&target->lock);
966 if (target->state != SRP_TARGET_REMOVED) {
967 target->state = SRP_TARGET_REMOVED;
970 spin_unlock_irq(&target->lock);
973 queue_work(srp_remove_wq, &target->remove_work);
978 static void srp_disconnect_target(struct srp_target_port *target)
980 struct srp_rdma_ch *ch;
983 /* XXX should send SRP_I_LOGOUT request */
985 for (i = 0; i < target->ch_count; i++) {
987 ch->connected = false;
989 if (target->using_rdma_cm) {
990 if (ch->rdma_cm.cm_id)
991 rdma_disconnect(ch->rdma_cm.cm_id);
994 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
998 shost_printk(KERN_DEBUG, target->scsi_host,
999 PFX "Sending CM DREQ failed\n");
1004 static void srp_free_req_data(struct srp_target_port *target,
1005 struct srp_rdma_ch *ch)
1007 struct srp_device *dev = target->srp_host->srp_dev;
1008 struct ib_device *ibdev = dev->dev;
1009 struct srp_request *req;
1015 for (i = 0; i < target->req_ring_size; ++i) {
1016 req = &ch->req_ring[i];
1017 if (dev->use_fast_reg) {
1018 kfree(req->fr_list);
1020 kfree(req->fmr_list);
1021 kfree(req->map_page);
1023 if (req->indirect_dma_addr) {
1024 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1025 target->indirect_size,
1028 kfree(req->indirect_desc);
1031 kfree(ch->req_ring);
1032 ch->req_ring = NULL;
1035 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1037 struct srp_target_port *target = ch->target;
1038 struct srp_device *srp_dev = target->srp_host->srp_dev;
1039 struct ib_device *ibdev = srp_dev->dev;
1040 struct srp_request *req;
1042 dma_addr_t dma_addr;
1043 int i, ret = -ENOMEM;
1045 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1050 for (i = 0; i < target->req_ring_size; ++i) {
1051 req = &ch->req_ring[i];
1052 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1056 if (srp_dev->use_fast_reg) {
1057 req->fr_list = mr_list;
1059 req->fmr_list = mr_list;
1060 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1066 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1067 if (!req->indirect_desc)
1070 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1071 target->indirect_size,
1073 if (ib_dma_mapping_error(ibdev, dma_addr))
1076 req->indirect_dma_addr = dma_addr;
1085 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1086 * @shost: SCSI host whose attributes to remove from sysfs.
1088 * Note: Any attributes defined in the host template and that did not exist
1089 * before invocation of this function will be ignored.
1091 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1093 struct device_attribute **attr;
1095 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1096 device_remove_file(&shost->shost_dev, *attr);
1099 static void srp_remove_target(struct srp_target_port *target)
1101 struct srp_rdma_ch *ch;
1104 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1106 srp_del_scsi_host_attr(target->scsi_host);
1107 srp_rport_get(target->rport);
1108 srp_remove_host(target->scsi_host);
1109 scsi_remove_host(target->scsi_host);
1110 srp_stop_rport_timers(target->rport);
1111 srp_disconnect_target(target);
1112 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1113 for (i = 0; i < target->ch_count; i++) {
1114 ch = &target->ch[i];
1115 srp_free_ch_ib(target, ch);
1117 cancel_work_sync(&target->tl_err_work);
1118 srp_rport_put(target->rport);
1119 for (i = 0; i < target->ch_count; i++) {
1120 ch = &target->ch[i];
1121 srp_free_req_data(target, ch);
1126 spin_lock(&target->srp_host->target_lock);
1127 list_del(&target->list);
1128 spin_unlock(&target->srp_host->target_lock);
1130 scsi_host_put(target->scsi_host);
1133 static void srp_remove_work(struct work_struct *work)
1135 struct srp_target_port *target =
1136 container_of(work, struct srp_target_port, remove_work);
1138 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1140 srp_remove_target(target);
1143 static void srp_rport_delete(struct srp_rport *rport)
1145 struct srp_target_port *target = rport->lld_data;
1147 srp_queue_remove_work(target);
1151 * srp_connected_ch() - number of connected channels
1152 * @target: SRP target port.
1154 static int srp_connected_ch(struct srp_target_port *target)
1158 for (i = 0; i < target->ch_count; i++)
1159 c += target->ch[i].connected;
1164 static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
1167 struct srp_target_port *target = ch->target;
1170 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1172 ret = srp_lookup_path(ch);
1177 init_completion(&ch->done);
1178 ret = srp_send_req(ch, max_iu_len, multich);
1181 ret = wait_for_completion_interruptible(&ch->done);
1186 * The CM event handling code will set status to
1187 * SRP_PORT_REDIRECT if we get a port redirect REJ
1188 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1189 * redirect REJ back.
1194 ch->connected = true;
1197 case SRP_PORT_REDIRECT:
1198 ret = srp_lookup_path(ch);
1203 case SRP_DLID_REDIRECT:
1206 case SRP_STALE_CONN:
1207 shost_printk(KERN_ERR, target->scsi_host, PFX
1208 "giving up on stale connection\n");
1218 return ret <= 0 ? ret : -ENODEV;
1221 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1223 srp_handle_qp_err(cq, wc, "INV RKEY");
1226 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1229 struct ib_send_wr wr = {
1230 .opcode = IB_WR_LOCAL_INV,
1234 .ex.invalidate_rkey = rkey,
1237 wr.wr_cqe = &req->reg_cqe;
1238 req->reg_cqe.done = srp_inv_rkey_err_done;
1239 return ib_post_send(ch->qp, &wr, NULL);
1242 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1243 struct srp_rdma_ch *ch,
1244 struct srp_request *req)
1246 struct srp_target_port *target = ch->target;
1247 struct srp_device *dev = target->srp_host->srp_dev;
1248 struct ib_device *ibdev = dev->dev;
1251 if (!scsi_sglist(scmnd) ||
1252 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1253 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1256 if (dev->use_fast_reg) {
1257 struct srp_fr_desc **pfr;
1259 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1260 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1262 shost_printk(KERN_ERR, target->scsi_host, PFX
1263 "Queueing INV WR for rkey %#x failed (%d)\n",
1264 (*pfr)->mr->rkey, res);
1265 queue_work(system_long_wq,
1266 &target->tl_err_work);
1270 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1272 } else if (dev->use_fmr) {
1273 struct ib_pool_fmr **pfmr;
1275 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1276 ib_fmr_pool_unmap(*pfmr);
1279 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1280 scmnd->sc_data_direction);
1284 * srp_claim_req - Take ownership of the scmnd associated with a request.
1285 * @ch: SRP RDMA channel.
1286 * @req: SRP request.
1287 * @sdev: If not NULL, only take ownership for this SCSI device.
1288 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1289 * ownership of @req->scmnd if it equals @scmnd.
1292 * Either NULL or a pointer to the SCSI command the caller became owner of.
1294 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1295 struct srp_request *req,
1296 struct scsi_device *sdev,
1297 struct scsi_cmnd *scmnd)
1299 unsigned long flags;
1301 spin_lock_irqsave(&ch->lock, flags);
1303 (!sdev || req->scmnd->device == sdev) &&
1304 (!scmnd || req->scmnd == scmnd)) {
1310 spin_unlock_irqrestore(&ch->lock, flags);
1316 * srp_free_req() - Unmap data and adjust ch->req_lim.
1317 * @ch: SRP RDMA channel.
1318 * @req: Request to be freed.
1319 * @scmnd: SCSI command associated with @req.
1320 * @req_lim_delta: Amount to be added to @target->req_lim.
1322 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1323 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1325 unsigned long flags;
1327 srp_unmap_data(scmnd, ch, req);
1329 spin_lock_irqsave(&ch->lock, flags);
1330 ch->req_lim += req_lim_delta;
1331 spin_unlock_irqrestore(&ch->lock, flags);
1334 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1335 struct scsi_device *sdev, int result)
1337 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1340 srp_free_req(ch, req, scmnd, 0);
1341 scmnd->result = result;
1342 scmnd->scsi_done(scmnd);
1346 static void srp_terminate_io(struct srp_rport *rport)
1348 struct srp_target_port *target = rport->lld_data;
1349 struct srp_rdma_ch *ch;
1352 for (i = 0; i < target->ch_count; i++) {
1353 ch = &target->ch[i];
1355 for (j = 0; j < target->req_ring_size; ++j) {
1356 struct srp_request *req = &ch->req_ring[j];
1358 srp_finish_req(ch, req, NULL,
1359 DID_TRANSPORT_FAILFAST << 16);
1364 /* Calculate maximum initiator to target information unit length. */
1365 static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
1367 uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
1368 sizeof(struct srp_indirect_buf) +
1369 cmd_sg_cnt * sizeof(struct srp_direct_buf);
1372 max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
1379 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1380 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1381 * srp_reset_device() or srp_reset_host() calls will occur while this function
1382 * is in progress. One way to realize that is not to call this function
1383 * directly but to call srp_reconnect_rport() instead since that last function
1384 * serializes calls of this function via rport->mutex and also blocks
1385 * srp_queuecommand() calls before invoking this function.
1387 static int srp_rport_reconnect(struct srp_rport *rport)
1389 struct srp_target_port *target = rport->lld_data;
1390 struct srp_rdma_ch *ch;
1391 uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
1394 bool multich = false;
1396 srp_disconnect_target(target);
1398 if (target->state == SRP_TARGET_SCANNING)
1402 * Now get a new local CM ID so that we avoid confusing the target in
1403 * case things are really fouled up. Doing so also ensures that all CM
1404 * callbacks will have finished before a new QP is allocated.
1406 for (i = 0; i < target->ch_count; i++) {
1407 ch = &target->ch[i];
1408 ret += srp_new_cm_id(ch);
1410 for (i = 0; i < target->ch_count; i++) {
1411 ch = &target->ch[i];
1412 for (j = 0; j < target->req_ring_size; ++j) {
1413 struct srp_request *req = &ch->req_ring[j];
1415 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1418 for (i = 0; i < target->ch_count; i++) {
1419 ch = &target->ch[i];
1421 * Whether or not creating a new CM ID succeeded, create a new
1422 * QP. This guarantees that all completion callback function
1423 * invocations have finished before request resetting starts.
1425 ret += srp_create_ch_ib(ch);
1427 INIT_LIST_HEAD(&ch->free_tx);
1428 for (j = 0; j < target->queue_size; ++j)
1429 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1432 target->qp_in_error = false;
1434 for (i = 0; i < target->ch_count; i++) {
1435 ch = &target->ch[i];
1438 ret = srp_connect_ch(ch, max_iu_len, multich);
1443 shost_printk(KERN_INFO, target->scsi_host,
1444 PFX "reconnect succeeded\n");
1449 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1450 unsigned int dma_len, u32 rkey)
1452 struct srp_direct_buf *desc = state->desc;
1454 WARN_ON_ONCE(!dma_len);
1456 desc->va = cpu_to_be64(dma_addr);
1457 desc->key = cpu_to_be32(rkey);
1458 desc->len = cpu_to_be32(dma_len);
1460 state->total_len += dma_len;
1465 static int srp_map_finish_fmr(struct srp_map_state *state,
1466 struct srp_rdma_ch *ch)
1468 struct srp_target_port *target = ch->target;
1469 struct srp_device *dev = target->srp_host->srp_dev;
1470 struct ib_pool_fmr *fmr;
1473 if (state->fmr.next >= state->fmr.end) {
1474 shost_printk(KERN_ERR, ch->target->scsi_host,
1475 PFX "Out of MRs (mr_per_cmd = %d)\n",
1476 ch->target->mr_per_cmd);
1480 WARN_ON_ONCE(!dev->use_fmr);
1482 if (state->npages == 0)
1485 if (state->npages == 1 && target->global_rkey) {
1486 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1487 target->global_rkey);
1491 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1492 state->npages, io_addr);
1494 return PTR_ERR(fmr);
1496 *state->fmr.next++ = fmr;
1499 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1500 state->dma_len, fmr->fmr->rkey);
1509 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1511 srp_handle_qp_err(cq, wc, "FAST REG");
1515 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1516 * where to start in the first element. If sg_offset_p != NULL then
1517 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1518 * byte that has not yet been mapped.
1520 static int srp_map_finish_fr(struct srp_map_state *state,
1521 struct srp_request *req,
1522 struct srp_rdma_ch *ch, int sg_nents,
1523 unsigned int *sg_offset_p)
1525 struct srp_target_port *target = ch->target;
1526 struct srp_device *dev = target->srp_host->srp_dev;
1527 struct ib_reg_wr wr;
1528 struct srp_fr_desc *desc;
1532 if (state->fr.next >= state->fr.end) {
1533 shost_printk(KERN_ERR, ch->target->scsi_host,
1534 PFX "Out of MRs (mr_per_cmd = %d)\n",
1535 ch->target->mr_per_cmd);
1539 WARN_ON_ONCE(!dev->use_fast_reg);
1541 if (sg_nents == 1 && target->global_rkey) {
1542 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1544 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1545 sg_dma_len(state->sg) - sg_offset,
1546 target->global_rkey);
1552 desc = srp_fr_pool_get(ch->fr_pool);
1556 rkey = ib_inc_rkey(desc->mr->rkey);
1557 ib_update_fast_reg_key(desc->mr, rkey);
1559 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1561 if (unlikely(n < 0)) {
1562 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1563 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1564 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1565 sg_offset_p ? *sg_offset_p : -1, n);
1569 WARN_ON_ONCE(desc->mr->length == 0);
1571 req->reg_cqe.done = srp_reg_mr_err_done;
1574 wr.wr.opcode = IB_WR_REG_MR;
1575 wr.wr.wr_cqe = &req->reg_cqe;
1577 wr.wr.send_flags = 0;
1579 wr.key = desc->mr->rkey;
1580 wr.access = (IB_ACCESS_LOCAL_WRITE |
1581 IB_ACCESS_REMOTE_READ |
1582 IB_ACCESS_REMOTE_WRITE);
1584 *state->fr.next++ = desc;
1587 srp_map_desc(state, desc->mr->iova,
1588 desc->mr->length, desc->mr->rkey);
1590 err = ib_post_send(ch->qp, &wr.wr, NULL);
1591 if (unlikely(err)) {
1592 WARN_ON_ONCE(err == -ENOMEM);
1599 static int srp_map_sg_entry(struct srp_map_state *state,
1600 struct srp_rdma_ch *ch,
1601 struct scatterlist *sg)
1603 struct srp_target_port *target = ch->target;
1604 struct srp_device *dev = target->srp_host->srp_dev;
1605 dma_addr_t dma_addr = sg_dma_address(sg);
1606 unsigned int dma_len = sg_dma_len(sg);
1607 unsigned int len = 0;
1610 WARN_ON_ONCE(!dma_len);
1613 unsigned offset = dma_addr & ~dev->mr_page_mask;
1615 if (state->npages == dev->max_pages_per_mr ||
1616 (state->npages > 0 && offset != 0)) {
1617 ret = srp_map_finish_fmr(state, ch);
1622 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1625 state->base_dma_addr = dma_addr;
1626 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1627 state->dma_len += len;
1633 * If the end of the MR is not on a page boundary then we need to
1634 * close it out and start a new one -- we can only merge at page
1638 if ((dma_addr & ~dev->mr_page_mask) != 0)
1639 ret = srp_map_finish_fmr(state, ch);
1643 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1644 struct srp_request *req, struct scatterlist *scat,
1647 struct scatterlist *sg;
1650 state->pages = req->map_page;
1651 state->fmr.next = req->fmr_list;
1652 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1654 for_each_sg(scat, sg, count, i) {
1655 ret = srp_map_sg_entry(state, ch, sg);
1660 ret = srp_map_finish_fmr(state, ch);
1667 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1668 struct srp_request *req, struct scatterlist *scat,
1671 unsigned int sg_offset = 0;
1673 state->fr.next = req->fr_list;
1674 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1683 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1684 if (unlikely(n < 0))
1688 for (i = 0; i < n; i++)
1689 state->sg = sg_next(state->sg);
1695 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1696 struct srp_request *req, struct scatterlist *scat,
1699 struct srp_target_port *target = ch->target;
1700 struct scatterlist *sg;
1703 for_each_sg(scat, sg, count, i) {
1704 srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
1705 target->global_rkey);
1712 * Register the indirect data buffer descriptor with the HCA.
1714 * Note: since the indirect data buffer descriptor has been allocated with
1715 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1718 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1719 void **next_mr, void **end_mr, u32 idb_len,
1722 struct srp_target_port *target = ch->target;
1723 struct srp_device *dev = target->srp_host->srp_dev;
1724 struct srp_map_state state;
1725 struct srp_direct_buf idb_desc;
1727 struct scatterlist idb_sg[1];
1730 memset(&state, 0, sizeof(state));
1731 memset(&idb_desc, 0, sizeof(idb_desc));
1732 state.gen.next = next_mr;
1733 state.gen.end = end_mr;
1734 state.desc = &idb_desc;
1735 state.base_dma_addr = req->indirect_dma_addr;
1736 state.dma_len = idb_len;
1738 if (dev->use_fast_reg) {
1740 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1741 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1742 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1743 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1745 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1748 WARN_ON_ONCE(ret < 1);
1749 } else if (dev->use_fmr) {
1750 state.pages = idb_pages;
1751 state.pages[0] = (req->indirect_dma_addr &
1754 ret = srp_map_finish_fmr(&state, ch);
1761 *idb_rkey = idb_desc.key;
1766 static void srp_check_mapping(struct srp_map_state *state,
1767 struct srp_rdma_ch *ch, struct srp_request *req,
1768 struct scatterlist *scat, int count)
1770 struct srp_device *dev = ch->target->srp_host->srp_dev;
1771 struct srp_fr_desc **pfr;
1772 u64 desc_len = 0, mr_len = 0;
1775 for (i = 0; i < state->ndesc; i++)
1776 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1777 if (dev->use_fast_reg)
1778 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1779 mr_len += (*pfr)->mr->length;
1780 else if (dev->use_fmr)
1781 for (i = 0; i < state->nmdesc; i++)
1782 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1783 if (desc_len != scsi_bufflen(req->scmnd) ||
1784 mr_len > scsi_bufflen(req->scmnd))
1785 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1786 scsi_bufflen(req->scmnd), desc_len, mr_len,
1787 state->ndesc, state->nmdesc);
1791 * srp_map_data() - map SCSI data buffer onto an SRP request
1792 * @scmnd: SCSI command to map
1793 * @ch: SRP RDMA channel
1796 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1797 * mapping failed. The size of any immediate data is not included in the
1800 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1801 struct srp_request *req)
1803 struct srp_target_port *target = ch->target;
1804 struct scatterlist *scat, *sg;
1805 struct srp_cmd *cmd = req->cmd->buf;
1806 int i, len, nents, count, ret;
1807 struct srp_device *dev;
1808 struct ib_device *ibdev;
1809 struct srp_map_state state;
1810 struct srp_indirect_buf *indirect_hdr;
1812 u32 idb_len, table_len;
1816 req->cmd->num_sge = 1;
1818 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1819 return sizeof(struct srp_cmd) + cmd->add_cdb_len;
1821 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1822 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1823 shost_printk(KERN_WARNING, target->scsi_host,
1824 PFX "Unhandled data direction %d\n",
1825 scmnd->sc_data_direction);
1829 nents = scsi_sg_count(scmnd);
1830 scat = scsi_sglist(scmnd);
1831 data_len = scsi_bufflen(scmnd);
1833 dev = target->srp_host->srp_dev;
1836 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1837 if (unlikely(count == 0))
1840 if (ch->use_imm_data &&
1841 count <= SRP_MAX_IMM_SGE &&
1842 SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
1843 scmnd->sc_data_direction == DMA_TO_DEVICE) {
1844 struct srp_imm_buf *buf;
1845 struct ib_sge *sge = &req->cmd->sge[1];
1847 fmt = SRP_DATA_DESC_IMM;
1848 len = SRP_IMM_DATA_OFFSET;
1850 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1851 buf->len = cpu_to_be32(data_len);
1852 WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
1853 for_each_sg(scat, sg, count, i) {
1854 sge[i].addr = sg_dma_address(sg);
1855 sge[i].length = sg_dma_len(sg);
1856 sge[i].lkey = target->lkey;
1858 req->cmd->num_sge += count;
1862 fmt = SRP_DATA_DESC_DIRECT;
1863 len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1864 sizeof(struct srp_direct_buf);
1866 if (count == 1 && target->global_rkey) {
1868 * The midlayer only generated a single gather/scatter
1869 * entry, or DMA mapping coalesced everything to a
1870 * single entry. So a direct descriptor along with
1871 * the DMA MR suffices.
1873 struct srp_direct_buf *buf;
1875 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1876 buf->va = cpu_to_be64(sg_dma_address(scat));
1877 buf->key = cpu_to_be32(target->global_rkey);
1878 buf->len = cpu_to_be32(sg_dma_len(scat));
1885 * We have more than one scatter/gather entry, so build our indirect
1886 * descriptor table, trying to merge as many entries as we can.
1888 indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
1890 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1891 target->indirect_size, DMA_TO_DEVICE);
1893 memset(&state, 0, sizeof(state));
1894 state.desc = req->indirect_desc;
1895 if (dev->use_fast_reg)
1896 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1897 else if (dev->use_fmr)
1898 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1900 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1901 req->nmdesc = state.nmdesc;
1906 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1907 "Memory mapping consistency check");
1908 if (DYNAMIC_DEBUG_BRANCH(ddm))
1909 srp_check_mapping(&state, ch, req, scat, count);
1912 /* We've mapped the request, now pull as much of the indirect
1913 * descriptor table as we can into the command buffer. If this
1914 * target is not using an external indirect table, we are
1915 * guaranteed to fit into the command, as the SCSI layer won't
1916 * give us more S/G entries than we allow.
1918 if (state.ndesc == 1) {
1920 * Memory registration collapsed the sg-list into one entry,
1921 * so use a direct descriptor.
1923 struct srp_direct_buf *buf;
1925 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1926 *buf = req->indirect_desc[0];
1930 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1931 !target->allow_ext_sg)) {
1932 shost_printk(KERN_ERR, target->scsi_host,
1933 "Could not fit S/G list into SRP_CMD\n");
1938 count = min(state.ndesc, target->cmd_sg_cnt);
1939 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1940 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1942 fmt = SRP_DATA_DESC_INDIRECT;
1943 len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1944 sizeof(struct srp_indirect_buf);
1945 len += count * sizeof (struct srp_direct_buf);
1947 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1948 count * sizeof (struct srp_direct_buf));
1950 if (!target->global_rkey) {
1951 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1952 idb_len, &idb_rkey);
1957 idb_rkey = cpu_to_be32(target->global_rkey);
1960 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1961 indirect_hdr->table_desc.key = idb_rkey;
1962 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1963 indirect_hdr->len = cpu_to_be32(state.total_len);
1965 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1966 cmd->data_out_desc_cnt = count;
1968 cmd->data_in_desc_cnt = count;
1970 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1974 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1975 cmd->buf_fmt = fmt << 4;
1982 srp_unmap_data(scmnd, ch, req);
1983 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1989 * Return an IU and possible credit to the free pool
1991 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1992 enum srp_iu_type iu_type)
1994 unsigned long flags;
1996 spin_lock_irqsave(&ch->lock, flags);
1997 list_add(&iu->list, &ch->free_tx);
1998 if (iu_type != SRP_IU_RSP)
2000 spin_unlock_irqrestore(&ch->lock, flags);
2004 * Must be called with ch->lock held to protect req_lim and free_tx.
2005 * If IU is not sent, it must be returned using srp_put_tx_iu().
2008 * An upper limit for the number of allocated information units for each
2010 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
2011 * more than Scsi_Host.can_queue requests.
2012 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
2013 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
2014 * one unanswered SRP request to an initiator.
2016 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
2017 enum srp_iu_type iu_type)
2019 struct srp_target_port *target = ch->target;
2020 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
2023 lockdep_assert_held(&ch->lock);
2025 ib_process_cq_direct(ch->send_cq, -1);
2027 if (list_empty(&ch->free_tx))
2030 /* Initiator responses to target requests do not consume credits */
2031 if (iu_type != SRP_IU_RSP) {
2032 if (ch->req_lim <= rsv) {
2033 ++target->zero_req_lim;
2040 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
2041 list_del(&iu->list);
2046 * Note: if this function is called from inside ib_drain_sq() then it will
2047 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2048 * with status IB_WC_SUCCESS then that's a bug.
2050 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2052 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2053 struct srp_rdma_ch *ch = cq->cq_context;
2055 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2056 srp_handle_qp_err(cq, wc, "SEND");
2060 lockdep_assert_held(&ch->lock);
2062 list_add(&iu->list, &ch->free_tx);
2066 * srp_post_send() - send an SRP information unit
2067 * @ch: RDMA channel over which to send the information unit.
2068 * @iu: Information unit to send.
2069 * @len: Length of the information unit excluding immediate data.
2071 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2073 struct srp_target_port *target = ch->target;
2074 struct ib_send_wr wr;
2076 if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
2079 iu->sge[0].addr = iu->dma;
2080 iu->sge[0].length = len;
2081 iu->sge[0].lkey = target->lkey;
2083 iu->cqe.done = srp_send_done;
2086 wr.wr_cqe = &iu->cqe;
2087 wr.sg_list = &iu->sge[0];
2088 wr.num_sge = iu->num_sge;
2089 wr.opcode = IB_WR_SEND;
2090 wr.send_flags = IB_SEND_SIGNALED;
2092 return ib_post_send(ch->qp, &wr, NULL);
2095 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2097 struct srp_target_port *target = ch->target;
2098 struct ib_recv_wr wr;
2101 list.addr = iu->dma;
2102 list.length = iu->size;
2103 list.lkey = target->lkey;
2105 iu->cqe.done = srp_recv_done;
2108 wr.wr_cqe = &iu->cqe;
2112 return ib_post_recv(ch->qp, &wr, NULL);
2115 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2117 struct srp_target_port *target = ch->target;
2118 struct srp_request *req;
2119 struct scsi_cmnd *scmnd;
2120 unsigned long flags;
2122 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2123 spin_lock_irqsave(&ch->lock, flags);
2124 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2125 if (rsp->tag == ch->tsk_mgmt_tag) {
2126 ch->tsk_mgmt_status = -1;
2127 if (be32_to_cpu(rsp->resp_data_len) >= 4)
2128 ch->tsk_mgmt_status = rsp->data[3];
2129 complete(&ch->tsk_mgmt_done);
2131 shost_printk(KERN_ERR, target->scsi_host,
2132 "Received tsk mgmt response too late for tag %#llx\n",
2135 spin_unlock_irqrestore(&ch->lock, flags);
2137 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2138 if (scmnd && scmnd->host_scribble) {
2139 req = (void *)scmnd->host_scribble;
2140 scmnd = srp_claim_req(ch, req, NULL, scmnd);
2145 shost_printk(KERN_ERR, target->scsi_host,
2146 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2147 rsp->tag, ch - target->ch, ch->qp->qp_num);
2149 spin_lock_irqsave(&ch->lock, flags);
2150 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2151 spin_unlock_irqrestore(&ch->lock, flags);
2155 scmnd->result = rsp->status;
2157 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2158 memcpy(scmnd->sense_buffer, rsp->data +
2159 be32_to_cpu(rsp->resp_data_len),
2160 min_t(int, be32_to_cpu(rsp->sense_data_len),
2161 SCSI_SENSE_BUFFERSIZE));
2164 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2165 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2166 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2167 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2168 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2169 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2170 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2171 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2173 srp_free_req(ch, req, scmnd,
2174 be32_to_cpu(rsp->req_lim_delta));
2176 scmnd->host_scribble = NULL;
2177 scmnd->scsi_done(scmnd);
2181 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2184 struct srp_target_port *target = ch->target;
2185 struct ib_device *dev = target->srp_host->srp_dev->dev;
2186 unsigned long flags;
2190 spin_lock_irqsave(&ch->lock, flags);
2191 ch->req_lim += req_delta;
2192 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2193 spin_unlock_irqrestore(&ch->lock, flags);
2196 shost_printk(KERN_ERR, target->scsi_host, PFX
2197 "no IU available to send response\n");
2202 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2203 memcpy(iu->buf, rsp, len);
2204 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2206 err = srp_post_send(ch, iu, len);
2208 shost_printk(KERN_ERR, target->scsi_host, PFX
2209 "unable to post response: %d\n", err);
2210 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2216 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2217 struct srp_cred_req *req)
2219 struct srp_cred_rsp rsp = {
2220 .opcode = SRP_CRED_RSP,
2223 s32 delta = be32_to_cpu(req->req_lim_delta);
2225 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2226 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2227 "problems processing SRP_CRED_REQ\n");
2230 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2231 struct srp_aer_req *req)
2233 struct srp_target_port *target = ch->target;
2234 struct srp_aer_rsp rsp = {
2235 .opcode = SRP_AER_RSP,
2238 s32 delta = be32_to_cpu(req->req_lim_delta);
2240 shost_printk(KERN_ERR, target->scsi_host, PFX
2241 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2243 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2244 shost_printk(KERN_ERR, target->scsi_host, PFX
2245 "problems processing SRP_AER_REQ\n");
2248 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2250 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2251 struct srp_rdma_ch *ch = cq->cq_context;
2252 struct srp_target_port *target = ch->target;
2253 struct ib_device *dev = target->srp_host->srp_dev->dev;
2257 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2258 srp_handle_qp_err(cq, wc, "RECV");
2262 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2265 opcode = *(u8 *) iu->buf;
2268 shost_printk(KERN_ERR, target->scsi_host,
2269 PFX "recv completion, opcode 0x%02x\n", opcode);
2270 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2271 iu->buf, wc->byte_len, true);
2276 srp_process_rsp(ch, iu->buf);
2280 srp_process_cred_req(ch, iu->buf);
2284 srp_process_aer_req(ch, iu->buf);
2288 /* XXX Handle target logout */
2289 shost_printk(KERN_WARNING, target->scsi_host,
2290 PFX "Got target logout request\n");
2294 shost_printk(KERN_WARNING, target->scsi_host,
2295 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2299 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2302 res = srp_post_recv(ch, iu);
2304 shost_printk(KERN_ERR, target->scsi_host,
2305 PFX "Recv failed with error code %d\n", res);
2309 * srp_tl_err_work() - handle a transport layer error
2310 * @work: Work structure embedded in an SRP target port.
2312 * Note: This function may get invoked before the rport has been created,
2313 * hence the target->rport test.
2315 static void srp_tl_err_work(struct work_struct *work)
2317 struct srp_target_port *target;
2319 target = container_of(work, struct srp_target_port, tl_err_work);
2321 srp_start_tl_fail_timers(target->rport);
2324 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2327 struct srp_rdma_ch *ch = cq->cq_context;
2328 struct srp_target_port *target = ch->target;
2330 if (ch->connected && !target->qp_in_error) {
2331 shost_printk(KERN_ERR, target->scsi_host,
2332 PFX "failed %s status %s (%d) for CQE %p\n",
2333 opname, ib_wc_status_msg(wc->status), wc->status,
2335 queue_work(system_long_wq, &target->tl_err_work);
2337 target->qp_in_error = true;
2340 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2342 struct srp_target_port *target = host_to_target(shost);
2343 struct srp_rdma_ch *ch;
2344 struct srp_request *req;
2346 struct srp_cmd *cmd;
2347 struct ib_device *dev;
2348 unsigned long flags;
2353 scmnd->result = srp_chkready(target->rport);
2354 if (unlikely(scmnd->result))
2357 WARN_ON_ONCE(scmnd->request->tag < 0);
2358 tag = blk_mq_unique_tag(scmnd->request);
2359 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2360 idx = blk_mq_unique_tag_to_tag(tag);
2361 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2362 dev_name(&shost->shost_gendev), tag, idx,
2363 target->req_ring_size);
2365 spin_lock_irqsave(&ch->lock, flags);
2366 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2367 spin_unlock_irqrestore(&ch->lock, flags);
2372 req = &ch->req_ring[idx];
2373 dev = target->srp_host->srp_dev->dev;
2374 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
2377 scmnd->host_scribble = (void *) req;
2380 memset(cmd, 0, sizeof *cmd);
2382 cmd->opcode = SRP_CMD;
2383 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2385 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2386 if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
2387 cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
2389 if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
2396 len = srp_map_data(scmnd, ch, req);
2398 shost_printk(KERN_ERR, target->scsi_host,
2399 PFX "Failed to map data (%d)\n", len);
2401 * If we ran out of memory descriptors (-ENOMEM) because an
2402 * application is queuing many requests with more than
2403 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2404 * to reduce queue depth temporarily.
2406 scmnd->result = len == -ENOMEM ?
2407 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2411 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
2414 if (srp_post_send(ch, iu, len)) {
2415 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2416 scmnd->result = DID_ERROR << 16;
2423 srp_unmap_data(scmnd, ch, req);
2426 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2429 * Avoid that the loops that iterate over the request ring can
2430 * encounter a dangling SCSI command pointer.
2435 if (scmnd->result) {
2436 scmnd->scsi_done(scmnd);
2439 ret = SCSI_MLQUEUE_HOST_BUSY;
2446 * Note: the resources allocated in this function are freed in
2449 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2451 struct srp_target_port *target = ch->target;
2454 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2458 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2463 for (i = 0; i < target->queue_size; ++i) {
2464 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2466 GFP_KERNEL, DMA_FROM_DEVICE);
2467 if (!ch->rx_ring[i])
2471 for (i = 0; i < target->queue_size; ++i) {
2472 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2474 GFP_KERNEL, DMA_TO_DEVICE);
2475 if (!ch->tx_ring[i])
2478 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2484 for (i = 0; i < target->queue_size; ++i) {
2485 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2486 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2499 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2501 uint64_t T_tr_ns, max_compl_time_ms;
2502 uint32_t rq_tmo_jiffies;
2505 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2506 * table 91), both the QP timeout and the retry count have to be set
2507 * for RC QP's during the RTR to RTS transition.
2509 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2510 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2513 * Set target->rq_tmo_jiffies to one second more than the largest time
2514 * it can take before an error completion is generated. See also
2515 * C9-140..142 in the IBTA spec for more information about how to
2516 * convert the QP Local ACK Timeout value to nanoseconds.
2518 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2519 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2520 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2521 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2523 return rq_tmo_jiffies;
2526 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2527 const struct srp_login_rsp *lrsp,
2528 struct srp_rdma_ch *ch)
2530 struct srp_target_port *target = ch->target;
2531 struct ib_qp_attr *qp_attr = NULL;
2536 if (lrsp->opcode == SRP_LOGIN_RSP) {
2537 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2538 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2539 ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
2540 ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
2542 WARN_ON_ONCE(ch->max_it_iu_len >
2543 be32_to_cpu(lrsp->max_it_iu_len));
2545 if (ch->use_imm_data)
2546 shost_printk(KERN_DEBUG, target->scsi_host,
2547 PFX "using immediate data\n");
2550 * Reserve credits for task management so we don't
2551 * bounce requests back to the SCSI mid-layer.
2553 target->scsi_host->can_queue
2554 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2555 target->scsi_host->can_queue);
2556 target->scsi_host->cmd_per_lun
2557 = min_t(int, target->scsi_host->can_queue,
2558 target->scsi_host->cmd_per_lun);
2560 shost_printk(KERN_WARNING, target->scsi_host,
2561 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2567 ret = srp_alloc_iu_bufs(ch);
2572 for (i = 0; i < target->queue_size; i++) {
2573 struct srp_iu *iu = ch->rx_ring[i];
2575 ret = srp_post_recv(ch, iu);
2580 if (!target->using_rdma_cm) {
2582 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2586 qp_attr->qp_state = IB_QPS_RTR;
2587 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2591 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2595 qp_attr->qp_state = IB_QPS_RTS;
2596 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2600 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2602 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2606 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2616 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2617 const struct ib_cm_event *event,
2618 struct srp_rdma_ch *ch)
2620 struct srp_target_port *target = ch->target;
2621 struct Scsi_Host *shost = target->scsi_host;
2622 struct ib_class_port_info *cpi;
2626 switch (event->param.rej_rcvd.reason) {
2627 case IB_CM_REJ_PORT_CM_REDIRECT:
2628 cpi = event->param.rej_rcvd.ari;
2629 dlid = be16_to_cpu(cpi->redirect_lid);
2630 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2631 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2632 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2633 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2635 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2638 case IB_CM_REJ_PORT_REDIRECT:
2639 if (srp_target_is_topspin(target)) {
2640 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2643 * Topspin/Cisco SRP gateways incorrectly send
2644 * reject reason code 25 when they mean 24
2647 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2649 shost_printk(KERN_DEBUG, shost,
2650 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2651 be64_to_cpu(dgid->global.subnet_prefix),
2652 be64_to_cpu(dgid->global.interface_id));
2654 ch->status = SRP_PORT_REDIRECT;
2656 shost_printk(KERN_WARNING, shost,
2657 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2658 ch->status = -ECONNRESET;
2662 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2663 shost_printk(KERN_WARNING, shost,
2664 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2665 ch->status = -ECONNRESET;
2668 case IB_CM_REJ_CONSUMER_DEFINED:
2669 opcode = *(u8 *) event->private_data;
2670 if (opcode == SRP_LOGIN_REJ) {
2671 struct srp_login_rej *rej = event->private_data;
2672 u32 reason = be32_to_cpu(rej->reason);
2674 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2675 shost_printk(KERN_WARNING, shost,
2676 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2678 shost_printk(KERN_WARNING, shost, PFX
2679 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2681 target->ib_cm.orig_dgid.raw,
2684 shost_printk(KERN_WARNING, shost,
2685 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2686 " opcode 0x%02x\n", opcode);
2687 ch->status = -ECONNRESET;
2690 case IB_CM_REJ_STALE_CONN:
2691 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2692 ch->status = SRP_STALE_CONN;
2696 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2697 event->param.rej_rcvd.reason);
2698 ch->status = -ECONNRESET;
2702 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2703 const struct ib_cm_event *event)
2705 struct srp_rdma_ch *ch = cm_id->context;
2706 struct srp_target_port *target = ch->target;
2709 switch (event->event) {
2710 case IB_CM_REQ_ERROR:
2711 shost_printk(KERN_DEBUG, target->scsi_host,
2712 PFX "Sending CM REQ failed\n");
2714 ch->status = -ECONNRESET;
2717 case IB_CM_REP_RECEIVED:
2719 srp_cm_rep_handler(cm_id, event->private_data, ch);
2722 case IB_CM_REJ_RECEIVED:
2723 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2726 srp_ib_cm_rej_handler(cm_id, event, ch);
2729 case IB_CM_DREQ_RECEIVED:
2730 shost_printk(KERN_WARNING, target->scsi_host,
2731 PFX "DREQ received - connection closed\n");
2732 ch->connected = false;
2733 if (ib_send_cm_drep(cm_id, NULL, 0))
2734 shost_printk(KERN_ERR, target->scsi_host,
2735 PFX "Sending CM DREP failed\n");
2736 queue_work(system_long_wq, &target->tl_err_work);
2739 case IB_CM_TIMEWAIT_EXIT:
2740 shost_printk(KERN_ERR, target->scsi_host,
2741 PFX "connection closed\n");
2747 case IB_CM_MRA_RECEIVED:
2748 case IB_CM_DREQ_ERROR:
2749 case IB_CM_DREP_RECEIVED:
2753 shost_printk(KERN_WARNING, target->scsi_host,
2754 PFX "Unhandled CM event %d\n", event->event);
2759 complete(&ch->done);
2764 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2765 struct rdma_cm_event *event)
2767 struct srp_target_port *target = ch->target;
2768 struct Scsi_Host *shost = target->scsi_host;
2771 switch (event->status) {
2772 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2773 shost_printk(KERN_WARNING, shost,
2774 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2775 ch->status = -ECONNRESET;
2778 case IB_CM_REJ_CONSUMER_DEFINED:
2779 opcode = *(u8 *) event->param.conn.private_data;
2780 if (opcode == SRP_LOGIN_REJ) {
2781 struct srp_login_rej *rej =
2782 (struct srp_login_rej *)
2783 event->param.conn.private_data;
2784 u32 reason = be32_to_cpu(rej->reason);
2786 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2787 shost_printk(KERN_WARNING, shost,
2788 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2790 shost_printk(KERN_WARNING, shost,
2791 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2793 shost_printk(KERN_WARNING, shost,
2794 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2797 ch->status = -ECONNRESET;
2800 case IB_CM_REJ_STALE_CONN:
2801 shost_printk(KERN_WARNING, shost,
2802 " REJ reason: stale connection\n");
2803 ch->status = SRP_STALE_CONN;
2807 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2809 ch->status = -ECONNRESET;
2814 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2815 struct rdma_cm_event *event)
2817 struct srp_rdma_ch *ch = cm_id->context;
2818 struct srp_target_port *target = ch->target;
2821 switch (event->event) {
2822 case RDMA_CM_EVENT_ADDR_RESOLVED:
2827 case RDMA_CM_EVENT_ADDR_ERROR:
2828 ch->status = -ENXIO;
2832 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2837 case RDMA_CM_EVENT_ROUTE_ERROR:
2838 case RDMA_CM_EVENT_UNREACHABLE:
2839 ch->status = -EHOSTUNREACH;
2843 case RDMA_CM_EVENT_CONNECT_ERROR:
2844 shost_printk(KERN_DEBUG, target->scsi_host,
2845 PFX "Sending CM REQ failed\n");
2847 ch->status = -ECONNRESET;
2850 case RDMA_CM_EVENT_ESTABLISHED:
2852 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2855 case RDMA_CM_EVENT_REJECTED:
2856 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2859 srp_rdma_cm_rej_handler(ch, event);
2862 case RDMA_CM_EVENT_DISCONNECTED:
2863 if (ch->connected) {
2864 shost_printk(KERN_WARNING, target->scsi_host,
2865 PFX "received DREQ\n");
2866 rdma_disconnect(ch->rdma_cm.cm_id);
2869 queue_work(system_long_wq, &target->tl_err_work);
2873 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2874 shost_printk(KERN_ERR, target->scsi_host,
2875 PFX "connection closed\n");
2882 shost_printk(KERN_WARNING, target->scsi_host,
2883 PFX "Unhandled CM event %d\n", event->event);
2888 complete(&ch->done);
2894 * srp_change_queue_depth - setting device queue depth
2895 * @sdev: scsi device struct
2896 * @qdepth: requested queue depth
2898 * Returns queue depth.
2901 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2903 if (!sdev->tagged_supported)
2905 return scsi_change_queue_depth(sdev, qdepth);
2908 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2909 u8 func, u8 *status)
2911 struct srp_target_port *target = ch->target;
2912 struct srp_rport *rport = target->rport;
2913 struct ib_device *dev = target->srp_host->srp_dev->dev;
2915 struct srp_tsk_mgmt *tsk_mgmt;
2918 if (!ch->connected || target->qp_in_error)
2922 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2923 * invoked while a task management function is being sent.
2925 mutex_lock(&rport->mutex);
2926 spin_lock_irq(&ch->lock);
2927 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2928 spin_unlock_irq(&ch->lock);
2931 mutex_unlock(&rport->mutex);
2938 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2941 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2943 tsk_mgmt->opcode = SRP_TSK_MGMT;
2944 int_to_scsilun(lun, &tsk_mgmt->lun);
2945 tsk_mgmt->tsk_mgmt_func = func;
2946 tsk_mgmt->task_tag = req_tag;
2948 spin_lock_irq(&ch->lock);
2949 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2950 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2951 spin_unlock_irq(&ch->lock);
2953 init_completion(&ch->tsk_mgmt_done);
2955 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2957 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2958 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2959 mutex_unlock(&rport->mutex);
2963 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2964 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2965 if (res > 0 && status)
2966 *status = ch->tsk_mgmt_status;
2967 mutex_unlock(&rport->mutex);
2969 WARN_ON_ONCE(res < 0);
2971 return res > 0 ? 0 : -1;
2974 static int srp_abort(struct scsi_cmnd *scmnd)
2976 struct srp_target_port *target = host_to_target(scmnd->device->host);
2977 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2980 struct srp_rdma_ch *ch;
2983 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2987 tag = blk_mq_unique_tag(scmnd->request);
2988 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2989 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2991 ch = &target->ch[ch_idx];
2992 if (!srp_claim_req(ch, req, NULL, scmnd))
2994 shost_printk(KERN_ERR, target->scsi_host,
2995 "Sending SRP abort for tag %#x\n", tag);
2996 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2997 SRP_TSK_ABORT_TASK, NULL) == 0)
2999 else if (target->rport->state == SRP_RPORT_LOST)
3003 if (ret == SUCCESS) {
3004 srp_free_req(ch, req, scmnd, 0);
3005 scmnd->result = DID_ABORT << 16;
3006 scmnd->scsi_done(scmnd);
3012 static int srp_reset_device(struct scsi_cmnd *scmnd)
3014 struct srp_target_port *target = host_to_target(scmnd->device->host);
3015 struct srp_rdma_ch *ch;
3018 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
3020 ch = &target->ch[0];
3021 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
3022 SRP_TSK_LUN_RESET, &status))
3030 static int srp_reset_host(struct scsi_cmnd *scmnd)
3032 struct srp_target_port *target = host_to_target(scmnd->device->host);
3034 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
3036 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
3039 static int srp_target_alloc(struct scsi_target *starget)
3041 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
3042 struct srp_target_port *target = host_to_target(shost);
3044 if (target->target_can_queue)
3045 starget->can_queue = target->target_can_queue;
3049 static int srp_slave_alloc(struct scsi_device *sdev)
3051 struct Scsi_Host *shost = sdev->host;
3052 struct srp_target_port *target = host_to_target(shost);
3053 struct srp_device *srp_dev = target->srp_host->srp_dev;
3054 struct ib_device *ibdev = srp_dev->dev;
3056 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3057 blk_queue_virt_boundary(sdev->request_queue,
3058 ~srp_dev->mr_page_mask);
3063 static int srp_slave_configure(struct scsi_device *sdev)
3065 struct Scsi_Host *shost = sdev->host;
3066 struct srp_target_port *target = host_to_target(shost);
3067 struct request_queue *q = sdev->request_queue;
3068 unsigned long timeout;
3070 if (sdev->type == TYPE_DISK) {
3071 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3072 blk_queue_rq_timeout(q, timeout);
3078 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3081 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3083 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3086 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3089 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3091 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3094 static ssize_t show_service_id(struct device *dev,
3095 struct device_attribute *attr, char *buf)
3097 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3099 if (target->using_rdma_cm)
3101 return sprintf(buf, "0x%016llx\n",
3102 be64_to_cpu(target->ib_cm.service_id));
3105 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3108 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3110 if (target->using_rdma_cm)
3112 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3115 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3118 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3120 return sprintf(buf, "%pI6\n", target->sgid.raw);
3123 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3126 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3127 struct srp_rdma_ch *ch = &target->ch[0];
3129 if (target->using_rdma_cm)
3131 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3134 static ssize_t show_orig_dgid(struct device *dev,
3135 struct device_attribute *attr, char *buf)
3137 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3139 if (target->using_rdma_cm)
3141 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3144 static ssize_t show_req_lim(struct device *dev,
3145 struct device_attribute *attr, char *buf)
3147 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3148 struct srp_rdma_ch *ch;
3149 int i, req_lim = INT_MAX;
3151 for (i = 0; i < target->ch_count; i++) {
3152 ch = &target->ch[i];
3153 req_lim = min(req_lim, ch->req_lim);
3155 return sprintf(buf, "%d\n", req_lim);
3158 static ssize_t show_zero_req_lim(struct device *dev,
3159 struct device_attribute *attr, char *buf)
3161 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3163 return sprintf(buf, "%d\n", target->zero_req_lim);
3166 static ssize_t show_local_ib_port(struct device *dev,
3167 struct device_attribute *attr, char *buf)
3169 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3171 return sprintf(buf, "%d\n", target->srp_host->port);
3174 static ssize_t show_local_ib_device(struct device *dev,
3175 struct device_attribute *attr, char *buf)
3177 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3179 return sprintf(buf, "%s\n",
3180 dev_name(&target->srp_host->srp_dev->dev->dev));
3183 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3186 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3188 return sprintf(buf, "%d\n", target->ch_count);
3191 static ssize_t show_comp_vector(struct device *dev,
3192 struct device_attribute *attr, char *buf)
3194 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3196 return sprintf(buf, "%d\n", target->comp_vector);
3199 static ssize_t show_tl_retry_count(struct device *dev,
3200 struct device_attribute *attr, char *buf)
3202 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3204 return sprintf(buf, "%d\n", target->tl_retry_count);
3207 static ssize_t show_cmd_sg_entries(struct device *dev,
3208 struct device_attribute *attr, char *buf)
3210 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3212 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3215 static ssize_t show_allow_ext_sg(struct device *dev,
3216 struct device_attribute *attr, char *buf)
3218 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3220 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3223 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
3224 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
3225 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
3226 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
3227 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
3228 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
3229 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
3230 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
3231 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
3232 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
3233 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3234 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
3235 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
3236 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
3237 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
3238 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
3240 static struct device_attribute *srp_host_attrs[] = {
3243 &dev_attr_service_id,
3247 &dev_attr_orig_dgid,
3249 &dev_attr_zero_req_lim,
3250 &dev_attr_local_ib_port,
3251 &dev_attr_local_ib_device,
3253 &dev_attr_comp_vector,
3254 &dev_attr_tl_retry_count,
3255 &dev_attr_cmd_sg_entries,
3256 &dev_attr_allow_ext_sg,
3260 static struct scsi_host_template srp_template = {
3261 .module = THIS_MODULE,
3262 .name = "InfiniBand SRP initiator",
3263 .proc_name = DRV_NAME,
3264 .target_alloc = srp_target_alloc,
3265 .slave_alloc = srp_slave_alloc,
3266 .slave_configure = srp_slave_configure,
3267 .info = srp_target_info,
3268 .queuecommand = srp_queuecommand,
3269 .change_queue_depth = srp_change_queue_depth,
3270 .eh_timed_out = srp_timed_out,
3271 .eh_abort_handler = srp_abort,
3272 .eh_device_reset_handler = srp_reset_device,
3273 .eh_host_reset_handler = srp_reset_host,
3274 .skip_settle_delay = true,
3275 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
3276 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
3278 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
3279 .shost_attrs = srp_host_attrs,
3280 .track_queue_depth = 1,
3283 static int srp_sdev_count(struct Scsi_Host *host)
3285 struct scsi_device *sdev;
3288 shost_for_each_device(sdev, host)
3296 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3297 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3298 * removal has been scheduled.
3299 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3301 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3303 struct srp_rport_identifiers ids;
3304 struct srp_rport *rport;
3306 target->state = SRP_TARGET_SCANNING;
3307 sprintf(target->target_name, "SRP.T10:%016llX",
3308 be64_to_cpu(target->id_ext));
3310 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3313 memcpy(ids.port_id, &target->id_ext, 8);
3314 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3315 ids.roles = SRP_RPORT_ROLE_TARGET;
3316 rport = srp_rport_add(target->scsi_host, &ids);
3317 if (IS_ERR(rport)) {
3318 scsi_remove_host(target->scsi_host);
3319 return PTR_ERR(rport);
3322 rport->lld_data = target;
3323 target->rport = rport;
3325 spin_lock(&host->target_lock);
3326 list_add_tail(&target->list, &host->target_list);
3327 spin_unlock(&host->target_lock);
3329 scsi_scan_target(&target->scsi_host->shost_gendev,
3330 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3332 if (srp_connected_ch(target) < target->ch_count ||
3333 target->qp_in_error) {
3334 shost_printk(KERN_INFO, target->scsi_host,
3335 PFX "SCSI scan failed - removing SCSI host\n");
3336 srp_queue_remove_work(target);
3340 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3341 dev_name(&target->scsi_host->shost_gendev),
3342 srp_sdev_count(target->scsi_host));
3344 spin_lock_irq(&target->lock);
3345 if (target->state == SRP_TARGET_SCANNING)
3346 target->state = SRP_TARGET_LIVE;
3347 spin_unlock_irq(&target->lock);
3353 static void srp_release_dev(struct device *dev)
3355 struct srp_host *host =
3356 container_of(dev, struct srp_host, dev);
3358 complete(&host->released);
3361 static struct class srp_class = {
3362 .name = "infiniband_srp",
3363 .dev_release = srp_release_dev
3367 * srp_conn_unique() - check whether the connection to a target is unique
3369 * @target: SRP target port.
3371 static bool srp_conn_unique(struct srp_host *host,
3372 struct srp_target_port *target)
3374 struct srp_target_port *t;
3377 if (target->state == SRP_TARGET_REMOVED)
3382 spin_lock(&host->target_lock);
3383 list_for_each_entry(t, &host->target_list, list) {
3385 target->id_ext == t->id_ext &&
3386 target->ioc_guid == t->ioc_guid &&
3387 target->initiator_ext == t->initiator_ext) {
3392 spin_unlock(&host->target_lock);
3399 * Target ports are added by writing
3401 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3402 * pkey=<P_Key>,service_id=<service ID>
3404 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3405 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3407 * to the add_target sysfs attribute.
3411 SRP_OPT_ID_EXT = 1 << 0,
3412 SRP_OPT_IOC_GUID = 1 << 1,
3413 SRP_OPT_DGID = 1 << 2,
3414 SRP_OPT_PKEY = 1 << 3,
3415 SRP_OPT_SERVICE_ID = 1 << 4,
3416 SRP_OPT_MAX_SECT = 1 << 5,
3417 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3418 SRP_OPT_IO_CLASS = 1 << 7,
3419 SRP_OPT_INITIATOR_EXT = 1 << 8,
3420 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3421 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3422 SRP_OPT_SG_TABLESIZE = 1 << 11,
3423 SRP_OPT_COMP_VECTOR = 1 << 12,
3424 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3425 SRP_OPT_QUEUE_SIZE = 1 << 14,
3426 SRP_OPT_IP_SRC = 1 << 15,
3427 SRP_OPT_IP_DEST = 1 << 16,
3428 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3431 static unsigned int srp_opt_mandatory[] = {
3442 static const match_table_t srp_opt_tokens = {
3443 { SRP_OPT_ID_EXT, "id_ext=%s" },
3444 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3445 { SRP_OPT_DGID, "dgid=%s" },
3446 { SRP_OPT_PKEY, "pkey=%x" },
3447 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3448 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3449 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3450 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3451 { SRP_OPT_IO_CLASS, "io_class=%x" },
3452 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3453 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3454 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3455 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3456 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3457 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3458 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3459 { SRP_OPT_IP_SRC, "src=%s" },
3460 { SRP_OPT_IP_DEST, "dest=%s" },
3461 { SRP_OPT_ERR, NULL }
3465 * srp_parse_in - parse an IP address and port number combination
3466 * @net: [in] Network namespace.
3467 * @sa: [out] Address family, IP address and port number.
3468 * @addr_port_str: [in] IP address and port number.
3469 * @has_port: [out] Whether or not @addr_port_str includes a port number.
3471 * Parse the following address formats:
3472 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3473 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3475 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3476 const char *addr_port_str, bool *has_port)
3478 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3484 port_str = strrchr(addr, ':');
3485 if (port_str && strchr(port_str, ']'))
3490 *has_port = port_str != NULL;
3491 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3492 if (ret && addr[0]) {
3493 addr_end = addr + strlen(addr) - 1;
3494 if (addr[0] == '[' && *addr_end == ']') {
3496 ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3501 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3505 static int srp_parse_options(struct net *net, const char *buf,
3506 struct srp_target_port *target)
3508 char *options, *sep_opt;
3510 substring_t args[MAX_OPT_ARGS];
3511 unsigned long long ull;
3518 options = kstrdup(buf, GFP_KERNEL);
3523 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3527 token = match_token(p, srp_opt_tokens, args);
3531 case SRP_OPT_ID_EXT:
3532 p = match_strdup(args);
3537 ret = kstrtoull(p, 16, &ull);
3539 pr_warn("invalid id_ext parameter '%s'\n", p);
3543 target->id_ext = cpu_to_be64(ull);
3547 case SRP_OPT_IOC_GUID:
3548 p = match_strdup(args);
3553 ret = kstrtoull(p, 16, &ull);
3555 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3559 target->ioc_guid = cpu_to_be64(ull);
3564 p = match_strdup(args);
3569 if (strlen(p) != 32) {
3570 pr_warn("bad dest GID parameter '%s'\n", p);
3575 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3582 if (match_hex(args, &token)) {
3583 pr_warn("bad P_Key parameter '%s'\n", p);
3586 target->ib_cm.pkey = cpu_to_be16(token);
3589 case SRP_OPT_SERVICE_ID:
3590 p = match_strdup(args);
3595 ret = kstrtoull(p, 16, &ull);
3597 pr_warn("bad service_id parameter '%s'\n", p);
3601 target->ib_cm.service_id = cpu_to_be64(ull);
3605 case SRP_OPT_IP_SRC:
3606 p = match_strdup(args);
3611 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
3614 pr_warn("bad source parameter '%s'\n", p);
3618 target->rdma_cm.src_specified = true;
3622 case SRP_OPT_IP_DEST:
3623 p = match_strdup(args);
3628 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
3633 pr_warn("bad dest parameter '%s'\n", p);
3637 target->using_rdma_cm = true;
3641 case SRP_OPT_MAX_SECT:
3642 if (match_int(args, &token)) {
3643 pr_warn("bad max sect parameter '%s'\n", p);
3646 target->scsi_host->max_sectors = token;
3649 case SRP_OPT_QUEUE_SIZE:
3650 if (match_int(args, &token) || token < 1) {
3651 pr_warn("bad queue_size parameter '%s'\n", p);
3654 target->scsi_host->can_queue = token;
3655 target->queue_size = token + SRP_RSP_SQ_SIZE +
3656 SRP_TSK_MGMT_SQ_SIZE;
3657 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3658 target->scsi_host->cmd_per_lun = token;
3661 case SRP_OPT_MAX_CMD_PER_LUN:
3662 if (match_int(args, &token) || token < 1) {
3663 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3667 target->scsi_host->cmd_per_lun = token;
3670 case SRP_OPT_TARGET_CAN_QUEUE:
3671 if (match_int(args, &token) || token < 1) {
3672 pr_warn("bad max target_can_queue parameter '%s'\n",
3676 target->target_can_queue = token;
3679 case SRP_OPT_IO_CLASS:
3680 if (match_hex(args, &token)) {
3681 pr_warn("bad IO class parameter '%s'\n", p);
3684 if (token != SRP_REV10_IB_IO_CLASS &&
3685 token != SRP_REV16A_IB_IO_CLASS) {
3686 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3687 token, SRP_REV10_IB_IO_CLASS,
3688 SRP_REV16A_IB_IO_CLASS);
3691 target->io_class = token;
3694 case SRP_OPT_INITIATOR_EXT:
3695 p = match_strdup(args);
3700 ret = kstrtoull(p, 16, &ull);
3702 pr_warn("bad initiator_ext value '%s'\n", p);
3706 target->initiator_ext = cpu_to_be64(ull);
3710 case SRP_OPT_CMD_SG_ENTRIES:
3711 if (match_int(args, &token) || token < 1 || token > 255) {
3712 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3716 target->cmd_sg_cnt = token;
3719 case SRP_OPT_ALLOW_EXT_SG:
3720 if (match_int(args, &token)) {
3721 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3724 target->allow_ext_sg = !!token;
3727 case SRP_OPT_SG_TABLESIZE:
3728 if (match_int(args, &token) || token < 1 ||
3729 token > SG_MAX_SEGMENTS) {
3730 pr_warn("bad max sg_tablesize parameter '%s'\n",
3734 target->sg_tablesize = token;
3737 case SRP_OPT_COMP_VECTOR:
3738 if (match_int(args, &token) || token < 0) {
3739 pr_warn("bad comp_vector parameter '%s'\n", p);
3742 target->comp_vector = token;
3745 case SRP_OPT_TL_RETRY_COUNT:
3746 if (match_int(args, &token) || token < 2 || token > 7) {
3747 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3751 target->tl_retry_count = token;
3755 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3761 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3762 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3768 pr_warn("target creation request is missing one or more parameters\n");
3770 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3771 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3772 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3773 target->scsi_host->cmd_per_lun,
3774 target->scsi_host->can_queue);
3781 static ssize_t srp_create_target(struct device *dev,
3782 struct device_attribute *attr,
3783 const char *buf, size_t count)
3785 struct srp_host *host =
3786 container_of(dev, struct srp_host, dev);
3787 struct Scsi_Host *target_host;
3788 struct srp_target_port *target;
3789 struct srp_rdma_ch *ch;
3790 struct srp_device *srp_dev = host->srp_dev;
3791 struct ib_device *ibdev = srp_dev->dev;
3792 int ret, node_idx, node, cpu, i;
3793 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3794 bool multich = false;
3795 uint32_t max_iu_len;
3797 target_host = scsi_host_alloc(&srp_template,
3798 sizeof (struct srp_target_port));
3802 target_host->transportt = ib_srp_transport_template;
3803 target_host->max_channel = 0;
3804 target_host->max_id = 1;
3805 target_host->max_lun = -1LL;
3806 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3807 target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
3809 target = host_to_target(target_host);
3811 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3812 target->io_class = SRP_REV16A_IB_IO_CLASS;
3813 target->scsi_host = target_host;
3814 target->srp_host = host;
3815 target->lkey = host->srp_dev->pd->local_dma_lkey;
3816 target->global_rkey = host->srp_dev->global_rkey;
3817 target->cmd_sg_cnt = cmd_sg_entries;
3818 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3819 target->allow_ext_sg = allow_ext_sg;
3820 target->tl_retry_count = 7;
3821 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3824 * Avoid that the SCSI host can be removed by srp_remove_target()
3825 * before this function returns.
3827 scsi_host_get(target->scsi_host);
3829 ret = mutex_lock_interruptible(&host->add_target_mutex);
3833 ret = srp_parse_options(target->net, buf, target);
3837 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3839 if (!srp_conn_unique(target->srp_host, target)) {
3840 if (target->using_rdma_cm) {
3841 shost_printk(KERN_INFO, target->scsi_host,
3842 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3843 be64_to_cpu(target->id_ext),
3844 be64_to_cpu(target->ioc_guid),
3845 &target->rdma_cm.dst);
3847 shost_printk(KERN_INFO, target->scsi_host,
3848 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3849 be64_to_cpu(target->id_ext),
3850 be64_to_cpu(target->ioc_guid),
3851 be64_to_cpu(target->initiator_ext));
3857 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3858 target->cmd_sg_cnt < target->sg_tablesize) {
3859 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3860 target->sg_tablesize = target->cmd_sg_cnt;
3863 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3864 bool gaps_reg = (ibdev->attrs.device_cap_flags &
3865 IB_DEVICE_SG_GAPS_REG);
3867 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3868 (ilog2(srp_dev->mr_page_size) - 9);
3871 * FR and FMR can only map one HCA page per entry. If
3872 * the start address is not aligned on a HCA page
3873 * boundary two entries will be used for the head and
3874 * the tail although these two entries combined
3875 * contain at most one HCA page of data. Hence the "+
3876 * 1" in the calculation below.
3878 * The indirect data buffer descriptor is contiguous
3879 * so the memory for that buffer will only be
3880 * registered if register_always is true. Hence add
3881 * one to mr_per_cmd if register_always has been set.
3883 mr_per_cmd = register_always +
3884 (target->scsi_host->max_sectors + 1 +
3885 max_sectors_per_mr - 1) / max_sectors_per_mr;
3887 mr_per_cmd = register_always +
3888 (target->sg_tablesize +
3889 srp_dev->max_pages_per_mr - 1) /
3890 srp_dev->max_pages_per_mr;
3892 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3893 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3894 max_sectors_per_mr, mr_per_cmd);
3897 target_host->sg_tablesize = target->sg_tablesize;
3898 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3899 target->mr_per_cmd = mr_per_cmd;
3900 target->indirect_size = target->sg_tablesize *
3901 sizeof (struct srp_direct_buf);
3902 max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data);
3904 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3905 INIT_WORK(&target->remove_work, srp_remove_work);
3906 spin_lock_init(&target->lock);
3907 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3912 target->ch_count = max_t(unsigned, num_online_nodes(),
3914 min(4 * num_online_nodes(),
3915 ibdev->num_comp_vectors),
3916 num_online_cpus()));
3917 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3923 for_each_online_node(node) {
3924 const int ch_start = (node_idx * target->ch_count /
3925 num_online_nodes());
3926 const int ch_end = ((node_idx + 1) * target->ch_count /
3927 num_online_nodes());
3928 const int cv_start = node_idx * ibdev->num_comp_vectors /
3930 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3934 for_each_online_cpu(cpu) {
3935 if (cpu_to_node(cpu) != node)
3937 if (ch_start + cpu_idx >= ch_end)
3939 ch = &target->ch[ch_start + cpu_idx];
3940 ch->target = target;
3941 ch->comp_vector = cv_start == cv_end ? cv_start :
3942 cv_start + cpu_idx % (cv_end - cv_start);
3943 spin_lock_init(&ch->lock);
3944 INIT_LIST_HEAD(&ch->free_tx);
3945 ret = srp_new_cm_id(ch);
3947 goto err_disconnect;
3949 ret = srp_create_ch_ib(ch);
3951 goto err_disconnect;
3953 ret = srp_alloc_req_data(ch);
3955 goto err_disconnect;
3957 ret = srp_connect_ch(ch, max_iu_len, multich);
3961 if (target->using_rdma_cm)
3962 snprintf(dst, sizeof(dst), "%pIS",
3963 &target->rdma_cm.dst);
3965 snprintf(dst, sizeof(dst), "%pI6",
3966 target->ib_cm.orig_dgid.raw);
3967 shost_printk(KERN_ERR, target->scsi_host,
3968 PFX "Connection %d/%d to %s failed\n",
3970 target->ch_count, dst);
3971 if (node_idx == 0 && cpu_idx == 0) {
3974 srp_free_ch_ib(target, ch);
3975 srp_free_req_data(target, ch);
3976 target->ch_count = ch - target->ch;
3988 target->scsi_host->nr_hw_queues = target->ch_count;
3990 ret = srp_add_target(host, target);
3992 goto err_disconnect;
3994 if (target->state != SRP_TARGET_REMOVED) {
3995 if (target->using_rdma_cm) {
3996 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3997 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3998 be64_to_cpu(target->id_ext),
3999 be64_to_cpu(target->ioc_guid),
4000 target->sgid.raw, &target->rdma_cm.dst);
4002 shost_printk(KERN_DEBUG, target->scsi_host, PFX
4003 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
4004 be64_to_cpu(target->id_ext),
4005 be64_to_cpu(target->ioc_guid),
4006 be16_to_cpu(target->ib_cm.pkey),
4007 be64_to_cpu(target->ib_cm.service_id),
4009 target->ib_cm.orig_dgid.raw);
4016 mutex_unlock(&host->add_target_mutex);
4019 scsi_host_put(target->scsi_host);
4022 * If a call to srp_remove_target() has not been scheduled,
4023 * drop the network namespace reference now that was obtained
4024 * earlier in this function.
4026 if (target->state != SRP_TARGET_REMOVED)
4027 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
4028 scsi_host_put(target->scsi_host);
4034 srp_disconnect_target(target);
4037 for (i = 0; i < target->ch_count; i++) {
4038 ch = &target->ch[i];
4039 srp_free_ch_ib(target, ch);
4040 srp_free_req_data(target, ch);
4047 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
4049 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
4052 struct srp_host *host = container_of(dev, struct srp_host, dev);
4054 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
4057 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
4059 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
4062 struct srp_host *host = container_of(dev, struct srp_host, dev);
4064 return sprintf(buf, "%d\n", host->port);
4067 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4069 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4071 struct srp_host *host;
4073 host = kzalloc(sizeof *host, GFP_KERNEL);
4077 INIT_LIST_HEAD(&host->target_list);
4078 spin_lock_init(&host->target_lock);
4079 init_completion(&host->released);
4080 mutex_init(&host->add_target_mutex);
4081 host->srp_dev = device;
4084 host->dev.class = &srp_class;
4085 host->dev.parent = device->dev->dev.parent;
4086 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
4089 if (device_register(&host->dev))
4091 if (device_create_file(&host->dev, &dev_attr_add_target))
4093 if (device_create_file(&host->dev, &dev_attr_ibdev))
4095 if (device_create_file(&host->dev, &dev_attr_port))
4101 device_unregister(&host->dev);
4109 static void srp_rename_dev(struct ib_device *device, void *client_data)
4111 struct srp_device *srp_dev = client_data;
4112 struct srp_host *host, *tmp_host;
4114 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4115 char name[IB_DEVICE_NAME_MAX + 8];
4117 snprintf(name, sizeof(name), "srp-%s-%d",
4118 dev_name(&device->dev), host->port);
4119 device_rename(&host->dev, name);
4123 static void srp_add_one(struct ib_device *device)
4125 struct srp_device *srp_dev;
4126 struct ib_device_attr *attr = &device->attrs;
4127 struct srp_host *host;
4130 u64 max_pages_per_mr;
4131 unsigned int flags = 0;
4133 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4138 * Use the smallest page size supported by the HCA, down to a
4139 * minimum of 4096 bytes. We're unlikely to build large sglists
4140 * out of smaller entries.
4142 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
4143 srp_dev->mr_page_size = 1 << mr_page_shift;
4144 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
4145 max_pages_per_mr = attr->max_mr_size;
4146 do_div(max_pages_per_mr, srp_dev->mr_page_size);
4147 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4148 attr->max_mr_size, srp_dev->mr_page_size,
4149 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4150 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4153 srp_dev->has_fmr = (device->ops.alloc_fmr &&
4154 device->ops.dealloc_fmr &&
4155 device->ops.map_phys_fmr &&
4156 device->ops.unmap_fmr);
4157 srp_dev->has_fr = (attr->device_cap_flags &
4158 IB_DEVICE_MEM_MGT_EXTENSIONS);
4159 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4160 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4161 } else if (!never_register &&
4162 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4163 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4164 (!srp_dev->has_fmr || prefer_fr));
4165 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4168 if (never_register || !register_always ||
4169 (!srp_dev->has_fmr && !srp_dev->has_fr))
4170 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4172 if (srp_dev->use_fast_reg) {
4173 srp_dev->max_pages_per_mr =
4174 min_t(u32, srp_dev->max_pages_per_mr,
4175 attr->max_fast_reg_page_list_len);
4177 srp_dev->mr_max_size = srp_dev->mr_page_size *
4178 srp_dev->max_pages_per_mr;
4179 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4180 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4181 attr->max_fast_reg_page_list_len,
4182 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4184 INIT_LIST_HEAD(&srp_dev->dev_list);
4186 srp_dev->dev = device;
4187 srp_dev->pd = ib_alloc_pd(device, flags);
4188 if (IS_ERR(srp_dev->pd))
4191 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4192 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4193 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4196 rdma_for_each_port (device, p) {
4197 host = srp_add_port(srp_dev, p);
4199 list_add_tail(&host->list, &srp_dev->dev_list);
4202 ib_set_client_data(device, &srp_client, srp_dev);
4209 static void srp_remove_one(struct ib_device *device, void *client_data)
4211 struct srp_device *srp_dev;
4212 struct srp_host *host, *tmp_host;
4213 struct srp_target_port *target;
4215 srp_dev = client_data;
4219 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4220 device_unregister(&host->dev);
4222 * Wait for the sysfs entry to go away, so that no new
4223 * target ports can be created.
4225 wait_for_completion(&host->released);
4228 * Remove all target ports.
4230 spin_lock(&host->target_lock);
4231 list_for_each_entry(target, &host->target_list, list)
4232 srp_queue_remove_work(target);
4233 spin_unlock(&host->target_lock);
4236 * Wait for tl_err and target port removal tasks.
4238 flush_workqueue(system_long_wq);
4239 flush_workqueue(srp_remove_wq);
4244 ib_dealloc_pd(srp_dev->pd);
4249 static struct srp_function_template ib_srp_transport_functions = {
4250 .has_rport_state = true,
4251 .reset_timer_if_blocked = true,
4252 .reconnect_delay = &srp_reconnect_delay,
4253 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
4254 .dev_loss_tmo = &srp_dev_loss_tmo,
4255 .reconnect = srp_rport_reconnect,
4256 .rport_delete = srp_rport_delete,
4257 .terminate_rport_io = srp_terminate_io,
4260 static int __init srp_init_module(void)
4264 BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
4265 BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
4266 BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
4267 BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
4269 if (srp_sg_tablesize) {
4270 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4271 if (!cmd_sg_entries)
4272 cmd_sg_entries = srp_sg_tablesize;
4275 if (!cmd_sg_entries)
4276 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4278 if (cmd_sg_entries > 255) {
4279 pr_warn("Clamping cmd_sg_entries to 255\n");
4280 cmd_sg_entries = 255;
4283 if (!indirect_sg_entries)
4284 indirect_sg_entries = cmd_sg_entries;
4285 else if (indirect_sg_entries < cmd_sg_entries) {
4286 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4288 indirect_sg_entries = cmd_sg_entries;
4291 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4292 pr_warn("Clamping indirect_sg_entries to %u\n",
4294 indirect_sg_entries = SG_MAX_SEGMENTS;
4297 srp_remove_wq = create_workqueue("srp_remove");
4298 if (!srp_remove_wq) {
4304 ib_srp_transport_template =
4305 srp_attach_transport(&ib_srp_transport_functions);
4306 if (!ib_srp_transport_template)
4309 ret = class_register(&srp_class);
4311 pr_err("couldn't register class infiniband_srp\n");
4315 ib_sa_register_client(&srp_sa_client);
4317 ret = ib_register_client(&srp_client);
4319 pr_err("couldn't register IB client\n");
4327 ib_sa_unregister_client(&srp_sa_client);
4328 class_unregister(&srp_class);
4331 srp_release_transport(ib_srp_transport_template);
4334 destroy_workqueue(srp_remove_wq);
4338 static void __exit srp_cleanup_module(void)
4340 ib_unregister_client(&srp_client);
4341 ib_sa_unregister_client(&srp_sa_client);
4342 class_unregister(&srp_class);
4343 srp_release_transport(ib_srp_transport_template);
4344 destroy_workqueue(srp_remove_wq);
4347 module_init(srp_init_module);
4348 module_exit(srp_cleanup_module);