Merge branches 'core', 'cxgb4', 'iser', 'mlx5' and 'ocrdma' into for-next
authorRoland Dreier <roland@purestorage.com>
Tue, 14 Oct 2014 21:09:12 +0000 (14:09 -0700)
committerRoland Dreier <roland@purestorage.com>
Tue, 14 Oct 2014 21:09:12 +0000 (14:09 -0700)
19 files changed:
MAINTAINERS
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
include/linux/mlx5/qp.h
include/rdma/ib_verbs.h

index 37054306dc9f31462d9defed2696bd795fcbc151..1f3c82833ea2d7c57ed1f028254baeeca21ae50f 100644 (file)
@@ -5020,6 +5020,7 @@ F:        include/scsi/*iscsi*
 
 ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
 M:     Or Gerlitz <ogerlitz@mellanox.com>
+M:     Sagi Grimberg <sagig@mellanox.com>
 M:     Roi Dayan <roid@mellanox.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
index 0600c50e62151246e163751bd41a081b328bba38..5ba2a86aab6a8ac4a5b6f5693b6627d0832a5f6c 100644 (file)
@@ -2518,6 +2518,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
        attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
        attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+       attr.vlan_id           = 0;
+       memset(&attr.dmac, 0, sizeof(attr.dmac));
        memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
        ah = ib_create_ah(pd, &attr);
index c73b22a257fe3c92c9398e5318acbc4ffa569b90..71ab83fde47292e95315c2170b00fec6277b7861 100644 (file)
@@ -477,6 +477,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
 
        entry->desc.async.element    = element;
        entry->desc.async.event_type = event;
+       entry->desc.async.reserved   = 0;
        entry->counter               = counter;
 
        list_add_tail(&entry->list, &file->async_file->event_list);
@@ -502,6 +503,10 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
 {
        struct ib_uevent_object *uobj;
 
+       /* for XRC target qp's, check that qp is live */
+       if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+               return;
+
        uobj = container_of(event->element.qp->uobject,
                            struct ib_uevent_object, uobject);
 
index d8907b20522abcb8eb46b0266a8495523eb241f2..a24431746377092528eee81d84ad9b660b585bb1 100644 (file)
@@ -650,13 +650,13 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
                        return -EINVAL;
 
                idx = get_index(vma->vm_pgoff);
+               if (idx >= uuari->num_uars)
+                       return -EINVAL;
+
                pfn = uar_index2pfn(dev, uuari->uars[idx].index);
                mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
                            (unsigned long long)pfn);
 
-               if (idx >= uuari->num_uars)
-                       return -EINVAL;
-
                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
                if (io_remap_pfn_range(vma, vma->vm_start, pfn,
                                       PAGE_SIZE, vma->vm_page_prot))
@@ -1414,8 +1414,8 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
        struct mlx5_ib_dev *dev = context;
-       destroy_umrc_res(dev);
        ib_unregister_device(&dev->ib_dev);
+       destroy_umrc_res(dev);
        destroy_dev_resources(&dev->devr);
        free_comp_eqs(dev);
        ib_dealloc_device(&dev->ib_dev);
index a3e81444c82519b6e3b7305ac6cb5911a69b20ce..dae07eae95073b8d08b42de972e62600560d17e3 100644 (file)
@@ -55,16 +55,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
        u64 pfn;
        struct scatterlist *sg;
        int entry;
+       unsigned long page_shift = ilog2(umem->page_size);
 
-       addr = addr >> PAGE_SHIFT;
+       addr = addr >> page_shift;
        tmp = (unsigned long)addr;
        m = find_first_bit(&tmp, sizeof(tmp));
        skip = 1 << m;
        mask = skip - 1;
        i = 0;
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-               len = sg_dma_len(sg) >> PAGE_SHIFT;
-               pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+               len = sg_dma_len(sg) >> page_shift;
+               pfn = sg_dma_address(sg) >> page_shift;
                for (k = 0; k < len; k++) {
                        if (!(i & mask)) {
                                tmp = (unsigned long)pfn;
@@ -103,14 +104,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 
                *ncont = 0;
        }
-       *shift = PAGE_SHIFT + m;
+       *shift = page_shift + m;
        *count = i;
 }
 
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
                          int page_shift, __be64 *pas, int umr)
 {
-       int shift = page_shift - PAGE_SHIFT;
+       unsigned long umem_page_shift = ilog2(umem->page_size);
+       int shift = page_shift - umem_page_shift;
        int mask = (1 << shift) - 1;
        int i, k;
        u64 cur = 0;
@@ -121,11 +123,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 
        i = 0;
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-               len = sg_dma_len(sg) >> PAGE_SHIFT;
+               len = sg_dma_len(sg) >> umem_page_shift;
                base = sg_dma_address(sg);
                for (k = 0; k < len; k++) {
                        if (!(i & mask)) {
-                               cur = base + (k << PAGE_SHIFT);
+                               cur = base + (k << umem_page_shift);
                                if (umr)
                                        cur |= 3;
 
@@ -134,7 +136,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
                                            i >> shift, be64_to_cpu(pas[i >> shift]));
                        }  else
                                mlx5_ib_dbg(dev, "=====> 0x%llx\n",
-                                           base + (k << PAGE_SHIFT));
+                                           base + (k << umem_page_shift));
                        i++;
                }
        }
index 80b3c63eab5d96b75a84180f56b9df9ec39a7b73..8ee7cb46e0590668bd79277bc672a9d41f90f02d 100644 (file)
@@ -881,12 +881,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        int order;
        int err;
 
-       mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
-                   start, virt_addr, length);
+       mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
+                   start, virt_addr, length, access_flags);
        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
                           0);
        if (IS_ERR(umem)) {
-               mlx5_ib_dbg(dev, "umem get failed\n");
+               mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
                return (void *)umem;
        }
 
index 8c574b63d77b900768a71ffe334d5cf046b7d83d..f1b49e024664a117bfbafecc7683cd08463c7afc 100644 (file)
@@ -1302,6 +1302,11 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
        path->rlid      = cpu_to_be16(ah->dlid);
 
        if (ah->ah_flags & IB_AH_GRH) {
+               if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
+                       pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+                              ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
+                       return -EINVAL;
+               }
                path->grh_mlid |= 1 << 7;
                path->mgid_index = ah->grh.sgid_index;
                path->hop_limit  = ah->grh.hop_limit;
@@ -1317,22 +1322,6 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
        path->static_rate = err;
        path->port = port;
 
-       if (ah->ah_flags & IB_AH_GRH) {
-               if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
-                       pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
-                              ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
-                       return -EINVAL;
-               }
-
-               path->grh_mlid |= 1 << 7;
-               path->mgid_index = ah->grh.sgid_index;
-               path->hop_limit  = ah->grh.hop_limit;
-               path->tclass_flowlabel =
-                       cpu_to_be32((ah->grh.traffic_class << 20) |
-                                   (ah->grh.flow_label));
-               memcpy(path->rgid, ah->grh.dgid.raw, 16);
-       }
-
        if (attr_mask & IB_QP_TIMEOUT)
                path->ackto_lt = attr->timeout << 3;
 
@@ -2020,56 +2009,31 @@ static u8 bs_selector(int block_size)
        }
 }
 
-static int format_selector(struct ib_sig_attrs *attr,
-                          struct ib_sig_domain *domain,
-                          int *selector)
+static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+                             struct mlx5_bsf_inl *inl)
 {
+       /* Valid inline section and allow BSF refresh */
+       inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+                                      MLX5_BSF_REFRESH_DIF);
+       inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+       inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
+       /* repeating block */
+       inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
+       inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+                       MLX5_DIF_CRC : MLX5_DIF_IPCS;
 
-#define FORMAT_DIF_NONE                0
-#define FORMAT_DIF_CRC_INC     8
-#define FORMAT_DIF_CRC_NO_INC  12
-#define FORMAT_DIF_CSUM_INC    13
-#define FORMAT_DIF_CSUM_NO_INC 14
+       if (domain->sig.dif.ref_remap)
+               inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
 
-       switch (domain->sig.dif.type) {
-       case IB_T10DIF_NONE:
-               /* No DIF */
-               *selector = FORMAT_DIF_NONE;
-               break;
-       case IB_T10DIF_TYPE1: /* Fall through */
-       case IB_T10DIF_TYPE2:
-               switch (domain->sig.dif.bg_type) {
-               case IB_T10DIF_CRC:
-                       *selector = FORMAT_DIF_CRC_INC;
-                       break;
-               case IB_T10DIF_CSUM:
-                       *selector = FORMAT_DIF_CSUM_INC;
-                       break;
-               default:
-                       return 1;
-               }
-               break;
-       case IB_T10DIF_TYPE3:
-               switch (domain->sig.dif.bg_type) {
-               case IB_T10DIF_CRC:
-                       *selector = domain->sig.dif.type3_inc_reftag ?
-                                          FORMAT_DIF_CRC_INC :
-                                          FORMAT_DIF_CRC_NO_INC;
-                       break;
-               case IB_T10DIF_CSUM:
-                       *selector = domain->sig.dif.type3_inc_reftag ?
-                                          FORMAT_DIF_CSUM_INC :
-                                          FORMAT_DIF_CSUM_NO_INC;
-                       break;
-               default:
-                       return 1;
-               }
-               break;
-       default:
-               return 1;
+       if (domain->sig.dif.app_escape) {
+               if (domain->sig.dif.ref_escape)
+                       inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+               else
+                       inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
        }
 
-       return 0;
+       inl->dif_app_bitmask_check =
+               cpu_to_be16(domain->sig.dif.apptag_check_mask);
 }
 
 static int mlx5_set_bsf(struct ib_mr *sig_mr,
@@ -2080,45 +2044,49 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
        struct mlx5_bsf_basic *basic = &bsf->basic;
        struct ib_sig_domain *mem = &sig_attrs->mem;
        struct ib_sig_domain *wire = &sig_attrs->wire;
-       int ret, selector;
 
        memset(bsf, 0, sizeof(*bsf));
+
+       /* Basic + Extended + Inline */
+       basic->bsf_size_sbs = 1 << 7;
+       /* Input domain check byte mask */
+       basic->check_byte_mask = sig_attrs->check_mask;
+       basic->raw_data_size = cpu_to_be32(data_size);
+
+       /* Memory domain */
        switch (sig_attrs->mem.sig_type) {
+       case IB_SIG_TYPE_NONE:
+               break;
        case IB_SIG_TYPE_T10_DIF:
-               if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
-                       return -EINVAL;
+               basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+               basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+               mlx5_fill_inl_bsf(mem, &bsf->m_inl);
+               break;
+       default:
+               return -EINVAL;
+       }
 
-               /* Input domain check byte mask */
-               basic->check_byte_mask = sig_attrs->check_mask;
+       /* Wire domain */
+       switch (sig_attrs->wire.sig_type) {
+       case IB_SIG_TYPE_NONE:
+               break;
+       case IB_SIG_TYPE_T10_DIF:
                if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
-                   mem->sig.dif.type == wire->sig.dif.type) {
+                   mem->sig_type == wire->sig_type) {
                        /* Same block structure */
-                       basic->bsf_size_sbs = 1 << 4;
+                       basic->bsf_size_sbs |= 1 << 4;
                        if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
-                               basic->wire.copy_byte_mask |= 0xc0;
+                               basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
                        if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
-                               basic->wire.copy_byte_mask |= 0x30;
+                               basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
                        if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
-                               basic->wire.copy_byte_mask |= 0x0f;
+                               basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
                } else
                        basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
 
-               basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
-               basic->raw_data_size = cpu_to_be32(data_size);
-
-               ret = format_selector(sig_attrs, mem, &selector);
-               if (ret)
-                       return -EINVAL;
-               basic->m_bfs_psv = cpu_to_be32(selector << 24 |
-                                              msig->psv_memory.psv_idx);
-
-               ret = format_selector(sig_attrs, wire, &selector);
-               if (ret)
-                       return -EINVAL;
-               basic->w_bfs_psv = cpu_to_be32(selector << 24 |
-                                              msig->psv_wire.psv_idx);
+               basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
+               mlx5_fill_inl_bsf(wire, &bsf->w_inl);
                break;
-
        default:
                return -EINVAL;
        }
@@ -2317,20 +2285,21 @@ static int set_psv_wr(struct ib_sig_domain *domain,
        memset(psv_seg, 0, sizeof(*psv_seg));
        psv_seg->psv_num = cpu_to_be32(psv_idx);
        switch (domain->sig_type) {
+       case IB_SIG_TYPE_NONE:
+               break;
        case IB_SIG_TYPE_T10_DIF:
                psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
                                                     domain->sig.dif.app_tag);
                psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
-
-               *seg += sizeof(*psv_seg);
-               *size += sizeof(*psv_seg) / 16;
                break;
-
        default:
                pr_err("Bad signature type given.\n");
                return 1;
        }
 
+       *seg += sizeof(*psv_seg);
+       *size += sizeof(*psv_seg) / 16;
+
        return 0;
 }
 
index dd35ae558ae1ce1fe25e46679e7932d383493881..638bff1ffc6c73b95a41a1556ff42a06680d6bca 100644 (file)
@@ -348,11 +348,6 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
        return mqe;
 }
 
-static void *ocrdma_alloc_mqe(void)
-{
-       return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
-}
-
 static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
 {
        dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -566,8 +561,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
        cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
        cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
 
-       cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
-       cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+       cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
+       cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
 
        cmd->async_cqid_ringsize = cq->id;
        cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -1189,10 +1184,10 @@ int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
 {
        struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
        struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
-       struct ocrdma_rdma_stats_resp *old_stats = NULL;
+       struct ocrdma_rdma_stats_resp *old_stats;
        int status;
 
-       old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+       old_stats = kmalloc(sizeof(*old_stats), GFP_KERNEL);
        if (old_stats == NULL)
                return -ENOMEM;
 
@@ -1235,10 +1230,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
        struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
        struct mgmt_hba_attribs *hba_attribs;
 
-       mqe = ocrdma_alloc_mqe();
+       mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
        if (!mqe)
                return status;
-       memset(mqe, 0, sizeof(*mqe));
 
        dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
        dma.va   = dma_alloc_coherent(&dev->nic_info.pdev->dev,
@@ -2279,7 +2273,8 @@ mbx_err:
 
 static int ocrdma_set_av_params(struct ocrdma_qp *qp,
                                struct ocrdma_modify_qp *cmd,
-                               struct ib_qp_attr *attrs)
+                               struct ib_qp_attr *attrs,
+                               int attr_mask)
 {
        int status;
        struct ib_ah_attr *ah_attr = &attrs->ah_attr;
@@ -2319,8 +2314,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
        ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
        cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
-       vlan_id = ah_attr->vlan_id;
-       if (vlan_id && (vlan_id < 0x1000)) {
+       if (attr_mask & IB_QP_VID) {
+               vlan_id = attrs->vlan_id;
                cmd->params.vlan_dmac_b4_to_b5 |=
                    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
                cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
@@ -2347,7 +2342,7 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
                cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
        }
        if (attr_mask & IB_QP_AV) {
-               status = ocrdma_set_av_params(qp, cmd, attrs);
+               status = ocrdma_set_av_params(qp, cmd, attrs, attr_mask);
                if (status)
                        return status;
        } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
index 256a06bc0b68478fee187302a0467330fadf3cc9..b0b2257b8e0430738cc7b5f5f36145868c7d4dc3 100644 (file)
@@ -388,6 +388,15 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
                device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
 }
 
+static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
+{
+       /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
+       union ib_gid *sgid = &dev->sgid_tbl[0];
+
+       sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+       ocrdma_get_guid(dev, &sgid->raw[8]);
+}
+
 static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
                                  struct net_device *net)
 {
@@ -434,6 +443,7 @@ static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
                                rdma_vlan_dev_real_dev(net_dev) : net_dev;
 
                if (real_dev == dev->nic_info.netdev) {
+                       ocrdma_add_default_sgid(dev);
                        ocrdma_init_ipv4_gids(dev, net_dev);
                        ocrdma_init_ipv6_gids(dev, net_dev);
                }
@@ -646,8 +656,10 @@ static int __init ocrdma_init_module(void)
        return 0;
 
 err_be_reg:
+#if IS_ENABLED(CONFIG_IPV6)
        ocrdma_unregister_inet6addr_notifier();
 err_notifier6:
+#endif
        ocrdma_unregister_inetaddr_notifier();
        return status;
 }
index 904989ec5eaa67b796abcf221292c5f70246a642..4e036480c1a8fa7d9f8d1be9e2267ea3147b2b5a 100644 (file)
@@ -28,8 +28,6 @@
 #ifndef __OCRDMA_SLI_H__
 #define __OCRDMA_SLI_H__
 
-#define Bit(_b) (1 << (_b))
-
 enum {
        OCRDMA_ASIC_GEN_SKH_R = 0x04,
        OCRDMA_ASIC_GEN_LANCER = 0x0B
@@ -103,7 +101,7 @@ enum {
        QTYPE_MCCQ      = 3
 };
 
-#define OCRDMA_MAX_SGID (8)
+#define OCRDMA_MAX_SGID                8
 
 #define OCRDMA_MAX_QP    2048
 #define OCRDMA_MAX_CQ    2048
@@ -128,33 +126,33 @@ enum {
 #define OCRDMA_DB_CQ_RING_ID_EXT_MASK  0x0C00  /* bits 10-11 of qid at 12-11 */
 /* qid #2 msbits at 12-11 */
 #define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT  0x1
-#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT       (16)       /* bits 16 - 28 */
+#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT  16      /* bits 16 - 28 */
 /* Rearm bit */
-#define OCRDMA_DB_CQ_REARM_SHIFT        (29)   /* bit 29 */
+#define OCRDMA_DB_CQ_REARM_SHIFT       29      /* bit 29 */
 /* solicited bit */
-#define OCRDMA_DB_CQ_SOLICIT_SHIFT   (31)      /* bit 31 */
+#define OCRDMA_DB_CQ_SOLICIT_SHIFT     31      /* bit 31 */
 
 #define OCRDMA_EQ_ID_MASK              0x1FF   /* bits 0 - 8 */
 #define OCRDMA_EQ_ID_EXT_MASK          0x3e00  /* bits 9-13 */
-#define OCRDMA_EQ_ID_EXT_MASK_SHIFT    (2)     /* qid bits 9-13 at 11-15 */
+#define OCRDMA_EQ_ID_EXT_MASK_SHIFT    2       /* qid bits 9-13 at 11-15 */
 
 /* Clear the interrupt for this eq */
-#define OCRDMA_EQ_CLR_SHIFT                    (9)     /* bit 9 */
+#define OCRDMA_EQ_CLR_SHIFT            9       /* bit 9 */
 /* Must be 1 */
-#define OCRDMA_EQ_TYPE_SHIFT           (10)    /* bit 10 */
+#define OCRDMA_EQ_TYPE_SHIFT           10      /* bit 10 */
 /* Number of event entries processed */
-#define OCRDMA_NUM_EQE_SHIFT           (16)    /* bits 16 - 28 */
+#define OCRDMA_NUM_EQE_SHIFT           16      /* bits 16 - 28 */
 /* Rearm bit */
-#define OCRDMA_REARM_SHIFT             (29)    /* bit 29 */
+#define OCRDMA_REARM_SHIFT             29      /* bit 29 */
 
 #define OCRDMA_MQ_ID_MASK              0x7FF   /* bits 0 - 10 */
 /* Number of entries posted */
-#define OCRDMA_MQ_NUM_MQE_SHIFT        (16)    /* bits 16 - 29 */
+#define OCRDMA_MQ_NUM_MQE_SHIFT        16      /* bits 16 - 29 */
 
-#define OCRDMA_MIN_HPAGE_SIZE (4096)
+#define OCRDMA_MIN_HPAGE_SIZE  4096
 
-#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
-#define OCRDMA_MAX_Q_PAGES     (8)
+#define OCRDMA_MIN_Q_PAGE_SIZE 4096
+#define OCRDMA_MAX_Q_PAGES     8
 
 #define OCRDMA_SLI_ASIC_ID_OFFSET      0x9C
 #define OCRDMA_SLI_ASIC_REV_MASK       0x000000FF
@@ -170,14 +168,14 @@ enum {
 # 6: 256K Bytes
 # 7: 512K Bytes
 */
-#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT     8
 #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
 
-#define MAX_OCRDMA_QP_PAGES      (8)
+#define MAX_OCRDMA_QP_PAGES            8
 #define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
 
-#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
-#define OCRDMA_DPP_CQE_SIZE (4)
+#define OCRDMA_CREATE_CQ_MAX_PAGES     4
+#define OCRDMA_DPP_CQE_SIZE            4
 
 #define OCRDMA_GEN2_MAX_CQE 1024
 #define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
@@ -238,7 +236,7 @@ struct ocrdma_mqe_sge {
 
 enum {
        OCRDMA_MQE_HDR_EMB_SHIFT        = 0,
-       OCRDMA_MQE_HDR_EMB_MASK         = Bit(0),
+       OCRDMA_MQE_HDR_EMB_MASK         = BIT(0),
        OCRDMA_MQE_HDR_SGE_CNT_SHIFT    = 3,
        OCRDMA_MQE_HDR_SGE_CNT_MASK     = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
        OCRDMA_MQE_HDR_SPECIAL_SHIFT    = 24,
@@ -292,7 +290,7 @@ struct ocrdma_pa {
        u32 hi;
 };
 
-#define MAX_OCRDMA_EQ_PAGES (8)
+#define MAX_OCRDMA_EQ_PAGES    8
 struct ocrdma_create_eq_req {
        struct ocrdma_mbx_hdr req;
        u32 num_pages;
@@ -304,7 +302,7 @@ struct ocrdma_create_eq_req {
 };
 
 enum {
-       OCRDMA_CREATE_EQ_VALID  = Bit(29),
+       OCRDMA_CREATE_EQ_VALID  = BIT(29),
        OCRDMA_CREATE_EQ_CNT_SHIFT      = 26,
        OCRDMA_CREATE_CQ_DELAY_SHIFT    = 13,
 };
@@ -314,7 +312,7 @@ struct ocrdma_create_eq_rsp {
        u32 vector_eqid;
 };
 
-#define OCRDMA_EQ_MINOR_OTHER (0x1)
+#define OCRDMA_EQ_MINOR_OTHER  0x1
 
 enum {
        OCRDMA_MCQE_STATUS_SHIFT        = 0,
@@ -322,13 +320,13 @@ enum {
        OCRDMA_MCQE_ESTATUS_SHIFT       = 16,
        OCRDMA_MCQE_ESTATUS_MASK        = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
        OCRDMA_MCQE_CONS_SHIFT          = 27,
-       OCRDMA_MCQE_CONS_MASK           = Bit(27),
+       OCRDMA_MCQE_CONS_MASK           = BIT(27),
        OCRDMA_MCQE_CMPL_SHIFT          = 28,
-       OCRDMA_MCQE_CMPL_MASK           = Bit(28),
+       OCRDMA_MCQE_CMPL_MASK           = BIT(28),
        OCRDMA_MCQE_AE_SHIFT            = 30,
-       OCRDMA_MCQE_AE_MASK             = Bit(30),
+       OCRDMA_MCQE_AE_MASK             = BIT(30),
        OCRDMA_MCQE_VALID_SHIFT         = 31,
-       OCRDMA_MCQE_VALID_MASK          = Bit(31)
+       OCRDMA_MCQE_VALID_MASK          = BIT(31)
 };
 
 struct ocrdma_mcqe {
@@ -339,13 +337,13 @@ struct ocrdma_mcqe {
 };
 
 enum {
-       OCRDMA_AE_MCQE_QPVALID          = Bit(31),
+       OCRDMA_AE_MCQE_QPVALID          = BIT(31),
        OCRDMA_AE_MCQE_QPID_MASK        = 0xFFFF,
 
-       OCRDMA_AE_MCQE_CQVALID          = Bit(31),
+       OCRDMA_AE_MCQE_CQVALID          = BIT(31),
        OCRDMA_AE_MCQE_CQID_MASK        = 0xFFFF,
-       OCRDMA_AE_MCQE_VALID            = Bit(31),
-       OCRDMA_AE_MCQE_AE               = Bit(30),
+       OCRDMA_AE_MCQE_VALID            = BIT(31),
+       OCRDMA_AE_MCQE_AE               = BIT(30),
        OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16,
        OCRDMA_AE_MCQE_EVENT_TYPE_MASK  =
                                        0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
@@ -386,9 +384,9 @@ enum {
        OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK      = 0xFF <<
                                        OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
        OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT       = 30,
-       OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK        = Bit(30),
+       OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK        = BIT(30),
        OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT    = 31,
-       OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK     = Bit(31)
+       OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK     = BIT(31)
 };
 
 struct ocrdma_ae_mpa_mcqe {
@@ -412,9 +410,9 @@ enum {
        OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK       = 0xFF <<
                                OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
        OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT        = 30,
-       OCRDMA_AE_QP_MCQE_EVENT_AE_MASK         = Bit(30),
+       OCRDMA_AE_QP_MCQE_EVENT_AE_MASK         = BIT(30),
        OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT     = 31,
-       OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK      = Bit(31)
+       OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK      = BIT(31)
 };
 
 struct ocrdma_ae_qp_mcqe {
@@ -449,9 +447,9 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
 /* mailbox command request and responses */
 enum {
        OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT          = 2,
-       OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK           = Bit(2),
+       OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK           = BIT(2),
        OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT        = 3,
-       OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK         = Bit(3),
+       OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK         = BIT(3),
        OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT               = 8,
        OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK                = 0xFFFFFF <<
                                OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
@@ -672,9 +670,9 @@ enum {
        OCRDMA_CREATE_CQ_PAGE_SIZE_MASK         = 0xFF,
 
        OCRDMA_CREATE_CQ_COALESCWM_SHIFT        = 12,
-       OCRDMA_CREATE_CQ_COALESCWM_MASK         = Bit(13) | Bit(12),
-       OCRDMA_CREATE_CQ_FLAGS_NODELAY          = Bit(14),
-       OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID       = Bit(15),
+       OCRDMA_CREATE_CQ_COALESCWM_MASK         = BIT(13) | BIT(12),
+       OCRDMA_CREATE_CQ_FLAGS_NODELAY          = BIT(14),
+       OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID       = BIT(15),
 
        OCRDMA_CREATE_CQ_EQ_ID_MASK             = 0xFFFF,
        OCRDMA_CREATE_CQ_CQE_COUNT_MASK         = 0xFFFF
@@ -687,8 +685,8 @@ enum {
        OCRDMA_CREATE_CQ_EQID_SHIFT             = 22,
 
        OCRDMA_CREATE_CQ_CNT_SHIFT              = 27,
-       OCRDMA_CREATE_CQ_FLAGS_VALID            = Bit(29),
-       OCRDMA_CREATE_CQ_FLAGS_EVENTABLE        = Bit(31),
+       OCRDMA_CREATE_CQ_FLAGS_VALID            = BIT(29),
+       OCRDMA_CREATE_CQ_FLAGS_EVENTABLE        = BIT(31),
        OCRDMA_CREATE_CQ_DEF_FLAGS              = OCRDMA_CREATE_CQ_FLAGS_VALID |
                                        OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
                                        OCRDMA_CREATE_CQ_FLAGS_NODELAY
@@ -731,8 +729,8 @@ enum {
        OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT         = 22,
        OCRDMA_CREATE_MQ_CQ_ID_SHIFT            = 16,
        OCRDMA_CREATE_MQ_RING_SIZE_SHIFT        = 16,
-       OCRDMA_CREATE_MQ_VALID                  = Bit(31),
-       OCRDMA_CREATE_MQ_ASYNC_CQ_VALID         = Bit(0)
+       OCRDMA_CREATE_MQ_VALID                  = BIT(31),
+       OCRDMA_CREATE_MQ_ASYNC_CQ_VALID         = BIT(0)
 };
 
 struct ocrdma_create_mq_req {
@@ -783,7 +781,7 @@ enum {
        OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16,
        OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19,
        OCRDMA_CREATE_QP_REQ_QPT_SHIFT          = 29,
-       OCRDMA_CREATE_QP_REQ_QPT_MASK           = Bit(31) | Bit(30) | Bit(29),
+       OCRDMA_CREATE_QP_REQ_QPT_MASK           = BIT(31) | BIT(30) | BIT(29),
 
        OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT      = 0,
        OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK       = 0xFFFF,
@@ -798,23 +796,23 @@ enum {
                                        OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
 
        OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT               = 0,
-       OCRDMA_CREATE_QP_REQ_FMR_EN_MASK                = Bit(0),
+       OCRDMA_CREATE_QP_REQ_FMR_EN_MASK                = BIT(0),
        OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT          = 1,
-       OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK           = Bit(1),
+       OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK           = BIT(1),
        OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT          = 2,
-       OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK           = Bit(2),
+       OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK           = BIT(2),
        OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT             = 3,
-       OCRDMA_CREATE_QP_REQ_INB_WREN_MASK              = Bit(3),
+       OCRDMA_CREATE_QP_REQ_INB_WREN_MASK              = BIT(3),
        OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT             = 4,
-       OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK              = Bit(4),
+       OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK              = BIT(4),
        OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT              = 5,
-       OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK               = Bit(5),
+       OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK               = BIT(5),
        OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT          = 6,
-       OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK           = Bit(6),
+       OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK           = BIT(6),
        OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT           = 7,
-       OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK            = Bit(7),
+       OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK            = BIT(7),
        OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT        = 8,
-       OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK         = Bit(8),
+       OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK         = BIT(8),
        OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT         = 16,
        OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK          = 0xFFFF <<
                                OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
@@ -927,7 +925,7 @@ enum {
        OCRDMA_CREATE_QP_RSP_SQ_ID_MASK                 = 0xFFFF <<
                                OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
 
-       OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK           = Bit(0),
+       OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK           = BIT(0),
        OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT      = 1,
        OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK       = 0x7FFF <<
                                OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
@@ -964,38 +962,38 @@ enum {
        OCRDMA_MODIFY_QP_ID_SHIFT       = 0,
        OCRDMA_MODIFY_QP_ID_MASK        = 0xFFFF,
 
-       OCRDMA_QP_PARA_QPS_VALID        = Bit(0),
-       OCRDMA_QP_PARA_SQD_ASYNC_VALID  = Bit(1),
-       OCRDMA_QP_PARA_PKEY_VALID       = Bit(2),
-       OCRDMA_QP_PARA_QKEY_VALID       = Bit(3),
-       OCRDMA_QP_PARA_PMTU_VALID       = Bit(4),
-       OCRDMA_QP_PARA_ACK_TO_VALID     = Bit(5),
-       OCRDMA_QP_PARA_RETRY_CNT_VALID  = Bit(6),
-       OCRDMA_QP_PARA_RRC_VALID        = Bit(7),
-       OCRDMA_QP_PARA_RQPSN_VALID      = Bit(8),
-       OCRDMA_QP_PARA_MAX_IRD_VALID    = Bit(9),
-       OCRDMA_QP_PARA_MAX_ORD_VALID    = Bit(10),
-       OCRDMA_QP_PARA_RNT_VALID        = Bit(11),
-       OCRDMA_QP_PARA_SQPSN_VALID      = Bit(12),
-       OCRDMA_QP_PARA_DST_QPN_VALID    = Bit(13),
-       OCRDMA_QP_PARA_MAX_WQE_VALID    = Bit(14),
-       OCRDMA_QP_PARA_MAX_RQE_VALID    = Bit(15),
-       OCRDMA_QP_PARA_SGE_SEND_VALID   = Bit(16),
-       OCRDMA_QP_PARA_SGE_RECV_VALID   = Bit(17),
-       OCRDMA_QP_PARA_SGE_WR_VALID     = Bit(18),
-       OCRDMA_QP_PARA_INB_RDEN_VALID   = Bit(19),
-       OCRDMA_QP_PARA_INB_WREN_VALID   = Bit(20),
-       OCRDMA_QP_PARA_FLOW_LBL_VALID   = Bit(21),
-       OCRDMA_QP_PARA_BIND_EN_VALID    = Bit(22),
-       OCRDMA_QP_PARA_ZLKEY_EN_VALID   = Bit(23),
-       OCRDMA_QP_PARA_FMR_EN_VALID     = Bit(24),
-       OCRDMA_QP_PARA_INBAT_EN_VALID   = Bit(25),
-       OCRDMA_QP_PARA_VLAN_EN_VALID    = Bit(26),
-
-       OCRDMA_MODIFY_QP_FLAGS_RD       = Bit(0),
-       OCRDMA_MODIFY_QP_FLAGS_WR       = Bit(1),
-       OCRDMA_MODIFY_QP_FLAGS_SEND     = Bit(2),
-       OCRDMA_MODIFY_QP_FLAGS_ATOMIC   = Bit(3)
+       OCRDMA_QP_PARA_QPS_VALID        = BIT(0),
+       OCRDMA_QP_PARA_SQD_ASYNC_VALID  = BIT(1),
+       OCRDMA_QP_PARA_PKEY_VALID       = BIT(2),
+       OCRDMA_QP_PARA_QKEY_VALID       = BIT(3),
+       OCRDMA_QP_PARA_PMTU_VALID       = BIT(4),
+       OCRDMA_QP_PARA_ACK_TO_VALID     = BIT(5),
+       OCRDMA_QP_PARA_RETRY_CNT_VALID  = BIT(6),
+       OCRDMA_QP_PARA_RRC_VALID        = BIT(7),
+       OCRDMA_QP_PARA_RQPSN_VALID      = BIT(8),
+       OCRDMA_QP_PARA_MAX_IRD_VALID    = BIT(9),
+       OCRDMA_QP_PARA_MAX_ORD_VALID    = BIT(10),
+       OCRDMA_QP_PARA_RNT_VALID        = BIT(11),
+       OCRDMA_QP_PARA_SQPSN_VALID      = BIT(12),
+       OCRDMA_QP_PARA_DST_QPN_VALID    = BIT(13),
+       OCRDMA_QP_PARA_MAX_WQE_VALID    = BIT(14),
+       OCRDMA_QP_PARA_MAX_RQE_VALID    = BIT(15),
+       OCRDMA_QP_PARA_SGE_SEND_VALID   = BIT(16),
+       OCRDMA_QP_PARA_SGE_RECV_VALID   = BIT(17),
+       OCRDMA_QP_PARA_SGE_WR_VALID     = BIT(18),
+       OCRDMA_QP_PARA_INB_RDEN_VALID   = BIT(19),
+       OCRDMA_QP_PARA_INB_WREN_VALID   = BIT(20),
+       OCRDMA_QP_PARA_FLOW_LBL_VALID   = BIT(21),
+       OCRDMA_QP_PARA_BIND_EN_VALID    = BIT(22),
+       OCRDMA_QP_PARA_ZLKEY_EN_VALID   = BIT(23),
+       OCRDMA_QP_PARA_FMR_EN_VALID     = BIT(24),
+       OCRDMA_QP_PARA_INBAT_EN_VALID   = BIT(25),
+       OCRDMA_QP_PARA_VLAN_EN_VALID    = BIT(26),
+
+       OCRDMA_MODIFY_QP_FLAGS_RD       = BIT(0),
+       OCRDMA_MODIFY_QP_FLAGS_WR       = BIT(1),
+       OCRDMA_MODIFY_QP_FLAGS_SEND     = BIT(2),
+       OCRDMA_MODIFY_QP_FLAGS_ATOMIC   = BIT(3)
 };
 
 enum {
@@ -1014,15 +1012,15 @@ enum {
        OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK      = 0xFFFF <<
                                        OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
 
-       OCRDMA_QP_PARAMS_FLAGS_FMR_EN           = Bit(0),
-       OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN        = Bit(1),
-       OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN       = Bit(2),
-       OCRDMA_QP_PARAMS_FLAGS_INBWR_EN         = Bit(3),
-       OCRDMA_QP_PARAMS_FLAGS_INBRD_EN         = Bit(4),
+       OCRDMA_QP_PARAMS_FLAGS_FMR_EN           = BIT(0),
+       OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN        = BIT(1),
+       OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN       = BIT(2),
+       OCRDMA_QP_PARAMS_FLAGS_INBWR_EN         = BIT(3),
+       OCRDMA_QP_PARAMS_FLAGS_INBRD_EN         = BIT(4),
        OCRDMA_QP_PARAMS_STATE_SHIFT            = 5,
-       OCRDMA_QP_PARAMS_STATE_MASK             = Bit(5) | Bit(6) | Bit(7),
-       OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC        = Bit(8),
-       OCRDMA_QP_PARAMS_FLAGS_INB_ATEN         = Bit(9),
+       OCRDMA_QP_PARAMS_STATE_MASK             = BIT(5) | BIT(6) | BIT(7),
+       OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC        = BIT(8),
+       OCRDMA_QP_PARAMS_FLAGS_INB_ATEN         = BIT(9),
        OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT     = 16,
        OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK      = 0xFFFF <<
                                        OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1277,7 +1275,7 @@ struct ocrdma_alloc_pd {
 };
 
 enum {
-       OCRDMA_ALLOC_PD_RSP_DPP                 = Bit(16),
+       OCRDMA_ALLOC_PD_RSP_DPP                 = BIT(16),
        OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT      = 20,
        OCRDMA_ALLOC_PD_RSP_PDID_MASK           = 0xFFFF,
 };
@@ -1309,18 +1307,18 @@ enum {
        OCRDMA_ALLOC_LKEY_PD_ID_MASK            = 0xFFFF,
 
        OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT      = 0,
-       OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK       = Bit(0),
+       OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK       = BIT(0),
        OCRDMA_ALLOC_LKEY_FMR_SHIFT             = 1,
-       OCRDMA_ALLOC_LKEY_FMR_MASK              = Bit(1),
+       OCRDMA_ALLOC_LKEY_FMR_MASK              = BIT(1),
        OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT      = 2,
-       OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK       = Bit(2),
+       OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK       = BIT(2),
        OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT       = 3,
-       OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK        = Bit(3),
+       OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK        = BIT(3),
        OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT       = 4,
-       OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK        = Bit(4),
+       OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK        = BIT(4),
        OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT        = 5,
-       OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK         = Bit(5),
-       OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK    = Bit(6),
+       OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK         = BIT(5),
+       OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK    = BIT(6),
        OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT   = 6,
        OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT        = 16,
        OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK         = 0xFFFF <<
@@ -1379,21 +1377,21 @@ enum {
        OCRDMA_REG_NSMR_HPAGE_SIZE_MASK         = 0xFF <<
                                        OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
        OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT       = 24,
-       OCRDMA_REG_NSMR_BIND_MEMWIN_MASK        = Bit(24),
+       OCRDMA_REG_NSMR_BIND_MEMWIN_MASK        = BIT(24),
        OCRDMA_REG_NSMR_ZB_SHIFT                = 25,
-       OCRDMA_REG_NSMR_ZB_SHIFT_MASK           = Bit(25),
+       OCRDMA_REG_NSMR_ZB_SHIFT_MASK           = BIT(25),
        OCRDMA_REG_NSMR_REMOTE_INV_SHIFT        = 26,
-       OCRDMA_REG_NSMR_REMOTE_INV_MASK         = Bit(26),
+       OCRDMA_REG_NSMR_REMOTE_INV_MASK         = BIT(26),
        OCRDMA_REG_NSMR_REMOTE_WR_SHIFT         = 27,
-       OCRDMA_REG_NSMR_REMOTE_WR_MASK          = Bit(27),
+       OCRDMA_REG_NSMR_REMOTE_WR_MASK          = BIT(27),
        OCRDMA_REG_NSMR_REMOTE_RD_SHIFT         = 28,
-       OCRDMA_REG_NSMR_REMOTE_RD_MASK          = Bit(28),
+       OCRDMA_REG_NSMR_REMOTE_RD_MASK          = BIT(28),
        OCRDMA_REG_NSMR_LOCAL_WR_SHIFT          = 29,
-       OCRDMA_REG_NSMR_LOCAL_WR_MASK           = Bit(29),
+       OCRDMA_REG_NSMR_LOCAL_WR_MASK           = BIT(29),
        OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT     = 30,
-       OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK      = Bit(30),
+       OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK      = BIT(30),
        OCRDMA_REG_NSMR_LAST_SHIFT              = 31,
-       OCRDMA_REG_NSMR_LAST_MASK               = Bit(31)
+       OCRDMA_REG_NSMR_LAST_MASK               = BIT(31)
 };
 
 struct ocrdma_reg_nsmr {
@@ -1420,7 +1418,7 @@ enum {
                                        OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
 
        OCRDMA_REG_NSMR_CONT_LAST_SHIFT         = 31,
-       OCRDMA_REG_NSMR_CONT_LAST_MASK          = Bit(31)
+       OCRDMA_REG_NSMR_CONT_LAST_MASK          = BIT(31)
 };
 
 struct ocrdma_reg_nsmr_cont {
@@ -1566,7 +1564,7 @@ struct ocrdma_delete_ah_tbl_rsp {
 
 enum {
        OCRDMA_EQE_VALID_SHIFT          = 0,
-       OCRDMA_EQE_VALID_MASK           = Bit(0),
+       OCRDMA_EQE_VALID_MASK           = BIT(0),
        OCRDMA_EQE_FOR_CQE_MASK         = 0xFFFE,
        OCRDMA_EQE_RESOURCE_ID_SHIFT    = 16,
        OCRDMA_EQE_RESOURCE_ID_MASK     = 0xFFFF <<
@@ -1624,11 +1622,11 @@ enum {
        OCRDMA_CQE_UD_STATUS_MASK       = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
        OCRDMA_CQE_STATUS_SHIFT         = 16,
        OCRDMA_CQE_STATUS_MASK          = 0xFF << OCRDMA_CQE_STATUS_SHIFT,
-       OCRDMA_CQE_VALID                = Bit(31),
-       OCRDMA_CQE_INVALIDATE           = Bit(30),
-       OCRDMA_CQE_QTYPE                = Bit(29),
-       OCRDMA_CQE_IMM                  = Bit(28),
-       OCRDMA_CQE_WRITE_IMM            = Bit(27),
+       OCRDMA_CQE_VALID                = BIT(31),
+       OCRDMA_CQE_INVALIDATE           = BIT(30),
+       OCRDMA_CQE_QTYPE                = BIT(29),
+       OCRDMA_CQE_IMM                  = BIT(28),
+       OCRDMA_CQE_WRITE_IMM            = BIT(27),
        OCRDMA_CQE_QTYPE_SQ             = 0,
        OCRDMA_CQE_QTYPE_RQ             = 1,
        OCRDMA_CQE_SRCQP_MASK           = 0xFFFFFF
@@ -1772,8 +1770,8 @@ struct ocrdma_grh {
        u16     rsvd;
 } __packed;
 
-#define OCRDMA_AV_VALID                Bit(7)
-#define OCRDMA_AV_VLAN_VALID   Bit(1)
+#define OCRDMA_AV_VALID                BIT(7)
+#define OCRDMA_AV_VLAN_VALID   BIT(1)
 
 struct ocrdma_av {
        struct ocrdma_eth_vlan eth_hdr;
index 8f5f2577f28855eae258b278484a2bdda2eaf396..ef6c78c2f6b9db7c2cbff255719b4ef097cf53ca 100644 (file)
@@ -388,7 +388,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
 
        memset(&resp, 0, sizeof(resp));
        resp.ah_tbl_len = ctx->ah_tbl.len;
-       resp.ah_tbl_page = ctx->ah_tbl.pa;
+       resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
 
        status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
        if (status)
@@ -870,7 +870,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
        uresp.page_size = PAGE_ALIGN(cq->len);
        uresp.num_pages = 1;
        uresp.max_hw_cqe = cq->max_hw_cqe;
-       uresp.page_addr[0] = cq->pa;
+       uresp.page_addr[0] = virt_to_phys(cq->va);
        uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
        uresp.db_page_size = dev->nic_info.db_page_size;
        uresp.phase_change = cq->phase_change ? 1 : 0;
@@ -1123,13 +1123,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
        uresp.sq_dbid = qp->sq.dbid;
        uresp.num_sq_pages = 1;
        uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
-       uresp.sq_page_addr[0] = qp->sq.pa;
+       uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
        uresp.num_wqe_allocated = qp->sq.max_cnt;
        if (!srq) {
                uresp.rq_dbid = qp->rq.dbid;
                uresp.num_rq_pages = 1;
                uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
-               uresp.rq_page_addr[0] = qp->rq.pa;
+               uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
                uresp.num_rqe_allocated = qp->rq.max_cnt;
        }
        uresp.db_page_addr = usr_db;
@@ -1680,7 +1680,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
        memset(&uresp, 0, sizeof(uresp));
        uresp.rq_dbid = srq->rq.dbid;
        uresp.num_rq_pages = 1;
-       uresp.rq_page_addr[0] = srq->rq.pa;
+       uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
        uresp.rq_page_size = srq->rq.len;
        uresp.db_page_addr = dev->nic_info.unmapped_db +
            (srq->pd->id * dev->nic_info.db_page_size);
index 93ce62fe1594345cd66bb19cd116cebdc2329490..f42ab14105ac61f9b29e75ae2178b7aa199df6d1 100644 (file)
@@ -83,7 +83,7 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
 
 int iser_debug_level = 0;
 bool iser_pi_enable = false;
-int iser_pi_guard = 0;
+int iser_pi_guard = 1;
 
 MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -97,14 +97,24 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
 
 module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
 
 static struct workqueue_struct *release_wq;
 struct iser_global ig;
 
+/*
+ * iscsi_iser_recv() - Process a successfull recv completion
+ * @conn:         iscsi connection
+ * @hdr:          iscsi header
+ * @rx_data:      buffer containing receive data payload
+ * @rx_data_len:  length of rx_data
+ *
+ * Notes: In case of data length errors or iscsi PDU completion failures
+ *        this routine will signal iscsi layer of connection failure.
+ */
 void
-iscsi_iser_recv(struct iscsi_conn *conn,
-               struct iscsi_hdr *hdr, char *rx_data, int rx_data_len)
+iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+               char *rx_data, int rx_data_len)
 {
        int rc = 0;
        int datalen;
@@ -135,20 +145,30 @@ error:
        iscsi_conn_failure(conn, rc);
 }
 
-static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
+/**
+ * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
+ * @task:     iscsi task
+ * @opcode:   iscsi command opcode
+ *
+ * Netes: This routine can't fail, just assign iscsi task
+ *        hdr and max hdr size.
+ */
+static int
+iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
 
        task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
        task->hdr_max = sizeof(iser_task->desc.iscsi_header);
+
        return 0;
 }
 
 int iser_initialize_task_headers(struct iscsi_task *task,
                                                struct iser_tx_desc *tx_desc)
 {
-       struct iser_conn       *ib_conn   = task->conn->dd_data;
-       struct iser_device     *device    = ib_conn->device;
+       struct iser_conn       *iser_conn   = task->conn->dd_data;
+       struct iser_device *device = iser_conn->ib_conn.device;
        struct iscsi_iser_task *iser_task = task->dd_data;
        u64 dma_addr;
 
@@ -162,14 +182,18 @@ int iser_initialize_task_headers(struct iscsi_task *task,
        tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
        tx_desc->tx_sg[0].lkey   = device->mr->lkey;
 
-       iser_task->ib_conn = ib_conn;
+       iser_task->iser_conn = iser_conn;
        return 0;
 }
+
 /**
- * iscsi_iser_task_init - Initialize task
+ * iscsi_iser_task_init() - Initialize iscsi-iser task
  * @task: iscsi task
  *
  * Initialize the task for the scsi command or mgmt command.
+ *
+ * Return: Returns zero on success or -ENOMEM when failing
+ *         to init task headers (dma mapping error).
  */
 static int
 iscsi_iser_task_init(struct iscsi_task *task)
@@ -191,7 +215,7 @@ iscsi_iser_task_init(struct iscsi_task *task)
 }
 
 /**
- * iscsi_iser_mtask_xmit - xmit management(immediate) task
+ * iscsi_iser_mtask_xmit() - xmit management (immediate) task
  * @conn: iscsi connection
  * @task: task management task
  *
@@ -249,6 +273,12 @@ iscsi_iser_task_xmit_unsol_data_exit:
        return error;
 }
 
+/**
+ * iscsi_iser_task_xmit() - xmit iscsi-iser task
+ * @task: iscsi task
+ *
+ * Return: zero on success or escalates $error on failure.
+ */
 static int
 iscsi_iser_task_xmit(struct iscsi_task *task)
 {
@@ -286,12 +316,24 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
        return error;
 }
 
+/**
+ * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
+ * @task: iscsi task
+ *
+ * Notes: In case the RDMA device is already NULL (might have
+ *        been removed in DEVICE_REMOVAL CM event it will bail-out
+ *        without doing dma unmapping.
+ */
 static void iscsi_iser_cleanup_task(struct iscsi_task *task)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc    *tx_desc   = &iser_task->desc;
-       struct iser_conn       *ib_conn   = task->conn->dd_data;
-       struct iser_device     *device    = ib_conn->device;
+       struct iser_conn       *iser_conn         = task->conn->dd_data;
+       struct iser_device *device = iser_conn->ib_conn.device;
+
+       /* DEVICE_REMOVAL event might have already released the device */
+       if (!device)
+               return;
 
        ib_dma_unmap_single(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -306,7 +348,20 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
        }
 }
 
-static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+/**
+ * iscsi_iser_check_protection() - check protection information status of task.
+ * @task:     iscsi task
+ * @sector:   error sector if exsists (output)
+ *
+ * Return: zero if no data-integrity errors have occured
+ *         0x1: data-integrity error occured in the guard-block
+ *         0x2: data-integrity error occured in the reference tag
+ *         0x3: data-integrity error occured in the application tag
+ *
+ *         In addition the error sector is marked.
+ */
+static u8
+iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
 
@@ -318,8 +373,17 @@ static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
                                                 sector);
 }
 
+/**
+ * iscsi_iser_conn_create() - create a new iscsi-iser connection
+ * @cls_session: iscsi class connection
+ * @conn_idx:    connection index within the session (for MCS)
+ *
+ * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
+ *         otherwise.
+ */
 static struct iscsi_cls_conn *
-iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
+iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
+                      uint32_t conn_idx)
 {
        struct iscsi_conn *conn;
        struct iscsi_cls_conn *cls_conn;
@@ -338,13 +402,25 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
        return cls_conn;
 }
 
+/**
+ * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
+ * @cls_session:     iscsi class session
+ * @cls_conn:        iscsi class connection
+ * @transport_eph:   transport end-point handle
+ * @is_leading:      indicate if this is the session leading connection (MCS)
+ *
+ * Return: zero on success, $error if iscsi_conn_bind fails and
+ *         -EINVAL in case end-point doesn't exsits anymore or iser connection
+ *         state is not UP (teardown already started).
+ */
 static int
 iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
-                    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+                    struct iscsi_cls_conn *cls_conn,
+                    uint64_t transport_eph,
                     int is_leading)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
        struct iscsi_endpoint *ep;
        int error;
 
@@ -360,66 +436,100 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
                         (unsigned long long)transport_eph);
                return -EINVAL;
        }
-       ib_conn = ep->dd_data;
+       iser_conn = ep->dd_data;
 
-       mutex_lock(&ib_conn->state_mutex);
-       if (ib_conn->state != ISER_CONN_UP) {
+       mutex_lock(&iser_conn->state_mutex);
+       if (iser_conn->state != ISER_CONN_UP) {
                error = -EINVAL;
                iser_err("iser_conn %p state is %d, teardown started\n",
-                        ib_conn, ib_conn->state);
+                        iser_conn, iser_conn->state);
                goto out;
        }
 
-       error = iser_alloc_rx_descriptors(ib_conn, conn->session);
+       error = iser_alloc_rx_descriptors(iser_conn, conn->session);
        if (error)
                goto out;
 
        /* binds the iSER connection retrieved from the previously
         * connected ep_handle to the iSCSI layer connection. exchanges
         * connection pointers */
-       iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn);
+       iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
 
-       conn->dd_data = ib_conn;
-       ib_conn->iscsi_conn = conn;
+       conn->dd_data = iser_conn;
+       iser_conn->iscsi_conn = conn;
 
 out:
-       mutex_unlock(&ib_conn->state_mutex);
+       mutex_unlock(&iser_conn->state_mutex);
        return error;
 }
 
+/**
+ * iscsi_iser_conn_start() - start iscsi-iser connection
+ * @cls_conn: iscsi class connection
+ *
+ * Notes: Here iser intialize (or re-initialize) stop_completion as
+ *        from this point iscsi must call conn_stop in session/connection
+ *        teardown so iser transport must wait for it.
+ */
 static int
 iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
 {
        struct iscsi_conn *iscsi_conn;
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
 
        iscsi_conn = cls_conn->dd_data;
-       ib_conn = iscsi_conn->dd_data;
-       reinit_completion(&ib_conn->stop_completion);
+       iser_conn = iscsi_conn->dd_data;
+       reinit_completion(&iser_conn->stop_completion);
 
        return iscsi_conn_start(cls_conn);
 }
 
+/**
+ * iscsi_iser_conn_stop() - stop iscsi-iser connection
+ * @cls_conn:  iscsi class connection
+ * @flag:      indicate if recover or terminate (passed as is)
+ *
+ * Notes: Calling iscsi_conn_stop might theoretically race with
+ *        DEVICE_REMOVAL event and dereference a previously freed RDMA device
+ *        handle, so we call it under iser the state lock to protect against
+ *        this kind of race.
+ */
 static void
 iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iser_conn *ib_conn = conn->dd_data;
+       struct iser_conn *iser_conn = conn->dd_data;
 
-       iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
-       iscsi_conn_stop(cls_conn, flag);
+       iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
 
        /*
         * Userspace may have goofed up and not bound the connection or
         * might have only partially setup the connection.
         */
-       if (ib_conn) {
+       if (iser_conn) {
+               mutex_lock(&iser_conn->state_mutex);
+               iscsi_conn_stop(cls_conn, flag);
+               iser_conn_terminate(iser_conn);
+
+               /* unbind */
+               iser_conn->iscsi_conn = NULL;
                conn->dd_data = NULL;
-               complete(&ib_conn->stop_completion);
+
+               complete(&iser_conn->stop_completion);
+               mutex_unlock(&iser_conn->state_mutex);
+       } else {
+               iscsi_conn_stop(cls_conn, flag);
        }
 }
 
-static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
+/**
+ * iscsi_iser_session_destroy() - destroy iscsi-iser session
+ * @cls_session: iscsi class session
+ *
+ * Removes and free iscsi host.
+ */
+static void
+iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
 {
        struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
 
@@ -439,6 +549,16 @@ iser_dif_prot_caps(int prot_caps)
                                                      SHOST_DIX_TYPE3_PROTECTION : 0);
 }
 
+/**
+ * iscsi_iser_session_create() - create an iscsi-iser session
+ * @ep:             iscsi end-point handle
+ * @cmds_max:       maximum commands in this session
+ * @qdepth:         session command queue depth
+ * @initial_cmdsn:  initiator command sequnce number
+ *
+ * Allocates and adds a scsi host, expose DIF supprot if
+ * exists, and sets up an iscsi session.
+ */
 static struct iscsi_cls_session *
 iscsi_iser_session_create(struct iscsi_endpoint *ep,
                          uint16_t cmds_max, uint16_t qdepth,
@@ -447,7 +567,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
        struct iscsi_cls_session *cls_session;
        struct iscsi_session *session;
        struct Scsi_Host *shost;
-       struct iser_conn *ib_conn = NULL;
+       struct iser_conn *iser_conn = NULL;
+       struct ib_conn *ib_conn;
 
        shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
        if (!shost)
@@ -464,7 +585,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
         * the leading conn's ep so this will be NULL;
         */
        if (ep) {
-               ib_conn = ep->dd_data;
+               iser_conn = ep->dd_data;
+               ib_conn = &iser_conn->ib_conn;
                if (ib_conn->pi_support) {
                        u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
 
@@ -476,8 +598,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
                }
        }
 
-       if (iscsi_host_add(shost,
-                          ep ? ib_conn->device->ib_device->dma_device : NULL))
+       if (iscsi_host_add(shost, ep ?
+                          ib_conn->device->ib_device->dma_device : NULL))
                goto free_host;
 
        if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
@@ -549,6 +671,13 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
        return 0;
 }
 
+/**
+ * iscsi_iser_set_param() - set class connection parameter
+ * @cls_conn:    iscsi class connection
+ * @stats:       iscsi stats to output
+ *
+ * Output connection statistics.
+ */
 static void
 iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
 {
@@ -577,18 +706,18 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
 static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
                                   enum iscsi_param param, char *buf)
 {
-       struct iser_conn *ib_conn = ep->dd_data;
+       struct iser_conn *iser_conn = ep->dd_data;
        int len;
 
        switch (param) {
        case ISCSI_PARAM_CONN_PORT:
        case ISCSI_PARAM_CONN_ADDRESS:
-               if (!ib_conn || !ib_conn->cma_id)
+               if (!iser_conn || !iser_conn->ib_conn.cma_id)
                        return -ENOTCONN;
 
                return iscsi_conn_get_addr_param((struct sockaddr_storage *)
-                                       &ib_conn->cma_id->route.addr.dst_addr,
-                                       param, buf);
+                               &iser_conn->ib_conn.cma_id->route.addr.dst_addr,
+                               param, buf);
                break;
        default:
                return -ENOSYS;
@@ -597,29 +726,44 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
        return len;
 }
 
+/**
+ * iscsi_iser_ep_connect() - Initiate iSER connection establishment
+ * @shost:          scsi_host
+ * @dst_addr:       destination address
+ * @non-blocking:   indicate if routine can block
+ *
+ * Allocate an iscsi endpoint, an iser_conn structure and bind them.
+ * After that start RDMA connection establishment via rdma_cm. We
+ * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
+ * the endpoint will be destroyed at ep_disconnect while iser_conn will
+ * cleanup its resources asynchronuously.
+ *
+ * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
+ *         if fails.
+ */
 static struct iscsi_endpoint *
 iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
                      int non_blocking)
 {
        int err;
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
        struct iscsi_endpoint *ep;
 
        ep = iscsi_create_endpoint(0);
        if (!ep)
                return ERR_PTR(-ENOMEM);
 
-       ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
-       if (!ib_conn) {
+       iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
+       if (!iser_conn) {
                err = -ENOMEM;
                goto failure;
        }
 
-       ep->dd_data = ib_conn;
-       ib_conn->ep = ep;
-       iser_conn_init(ib_conn);
+       ep->dd_data = iser_conn;
+       iser_conn->ep = ep;
+       iser_conn_init(iser_conn);
 
-       err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
+       err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
        if (err)
                goto failure;
 
@@ -629,25 +773,38 @@ failure:
        return ERR_PTR(err);
 }
 
+/**
+ * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
+ * @ep:            iscsi endpoint (created at ep_connect)
+ * @timeout_ms:    polling timeout allowed in ms.
+ *
+ * This routine boils down to waiting for up_completion signaling
+ * that cma_id got CONNECTED event.
+ *
+ * Return: 1 if succeeded in connection establishment, 0 if timeout expired
+ *         (libiscsi will retry will kick in) or -1 if interrupted by signal
+ *         or more likely iser connection state transitioned to TEMINATING or
+ *         DOWN during the wait period.
+ */
 static int
 iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
 {
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
        int rc;
 
-       ib_conn = ep->dd_data;
-       rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
+       iser_conn = ep->dd_data;
+       rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
                                                       msecs_to_jiffies(timeout_ms));
        /* if conn establishment failed, return error code to iscsi */
        if (rc == 0) {
-               mutex_lock(&ib_conn->state_mutex);
-               if (ib_conn->state == ISER_CONN_TERMINATING ||
-                   ib_conn->state == ISER_CONN_DOWN)
+               mutex_lock(&iser_conn->state_mutex);
+               if (iser_conn->state == ISER_CONN_TERMINATING ||
+                   iser_conn->state == ISER_CONN_DOWN)
                        rc = -1;
-               mutex_unlock(&ib_conn->state_mutex);
+               mutex_unlock(&iser_conn->state_mutex);
        }
 
-       iser_info("ib conn %p rc = %d\n", ib_conn, rc);
+       iser_info("ib conn %p rc = %d\n", iser_conn, rc);
 
        if (rc > 0)
                return 1; /* success, this is the equivalent of POLLOUT */
@@ -657,15 +814,26 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
                return rc; /* signal */
 }
 
+/**
+ * iscsi_iser_ep_disconnect() - Initiate connection teardown process
+ * @ep:    iscsi endpoint handle
+ *
+ * This routine is not blocked by iser and RDMA termination process
+ * completion as we queue a deffered work for iser/RDMA destruction
+ * and cleanup or actually call it immediately in case we didn't pass
+ * iscsi conn bind/start stage, thus it is safe.
+ */
 static void
 iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
 {
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
+
+       iser_conn = ep->dd_data;
+       iser_info("ep %p iser conn %p state %d\n",
+                 ep, iser_conn, iser_conn->state);
 
-       ib_conn = ep->dd_data;
-       iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
-       mutex_lock(&ib_conn->state_mutex);
-       iser_conn_terminate(ib_conn);
+       mutex_lock(&iser_conn->state_mutex);
+       iser_conn_terminate(iser_conn);
 
        /*
         * if iser_conn and iscsi_conn are bound, we must wait for
@@ -673,14 +841,14 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
         * the iser resources. Otherwise we are safe to free resources
         * immediately.
         */
-       if (ib_conn->iscsi_conn) {
-               INIT_WORK(&ib_conn->release_work, iser_release_work);
-               queue_work(release_wq, &ib_conn->release_work);
-               mutex_unlock(&ib_conn->state_mutex);
+       if (iser_conn->iscsi_conn) {
+               INIT_WORK(&iser_conn->release_work, iser_release_work);
+               queue_work(release_wq, &iser_conn->release_work);
+               mutex_unlock(&iser_conn->state_mutex);
        } else {
-               ib_conn->state = ISER_CONN_DOWN;
-               mutex_unlock(&ib_conn->state_mutex);
-               iser_conn_release(ib_conn);
+               iser_conn->state = ISER_CONN_DOWN;
+               mutex_unlock(&iser_conn->state_mutex);
+               iser_conn_release(iser_conn);
        }
        iscsi_destroy_endpoint(ep);
 }
@@ -843,7 +1011,7 @@ register_transport_failure:
 
 static void __exit iser_exit(void)
 {
-       struct iser_conn *ib_conn, *n;
+       struct iser_conn *iser_conn, *n;
        int connlist_empty;
 
        iser_dbg("Removing iSER datamover...\n");
@@ -856,8 +1024,9 @@ static void __exit iser_exit(void)
        if (!connlist_empty) {
                iser_err("Error cleanup stage completed but we still have iser "
                         "connections, destroying them anyway.\n");
-               list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
-                       iser_conn_release(ib_conn);
+               list_for_each_entry_safe(iser_conn, n, &ig.connlist,
+                                        conn_list) {
+                       iser_conn_release(iser_conn);
                }
        }
 
index 9f0e0e34d6ca587af2c627c20fc141f6c0422b38..cd4174ca9a760dc70c0c369721407e1c2919a4c4 100644 (file)
 
 #define DRV_NAME       "iser"
 #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.4.1"
+#define DRV_VER                "1.4.8"
 
-#define iser_dbg(fmt, arg...)                          \
-       do {                                            \
-               if (iser_debug_level > 2)               \
-                       printk(KERN_DEBUG PFX "%s:" fmt,\
-                               __func__ , ## arg);     \
+#define iser_dbg(fmt, arg...)                           \
+       do {                                             \
+               if (iser_debug_level > 2)                \
+                       printk(KERN_DEBUG PFX "%s: " fmt,\
+                               __func__ , ## arg);      \
        } while (0)
 
 #define iser_warn(fmt, arg...)                         \
        do {                                            \
                if (iser_debug_level > 0)               \
-                       pr_warn(PFX "%s:" fmt,          \
+                       pr_warn(PFX "%s: " fmt,         \
                                __func__ , ## arg);     \
        } while (0)
 
 #define iser_info(fmt, arg...)                         \
        do {                                            \
                if (iser_debug_level > 1)               \
-                       pr_info(PFX "%s:" fmt,          \
+                       pr_info(PFX "%s: " fmt,         \
                                __func__ , ## arg);     \
        } while (0)
 
 #define iser_err(fmt, arg...)                          \
        do {                                            \
-               printk(KERN_ERR PFX "%s:" fmt,          \
+               printk(KERN_ERR PFX "%s: " fmt,         \
                       __func__ , ## arg);              \
        } while (0)
 
 #define SHIFT_4K       12
 #define SIZE_4K        (1ULL << SHIFT_4K)
 #define MASK_4K        (~(SIZE_4K-1))
-
                                        /* support up to 512KB in one RDMA */
 #define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
 #define ISER_DEF_XMIT_CMDS_DEFAULT             512
                                        ISER_MAX_TX_MISC_PDUS         + \
                                        ISER_MAX_RX_MISC_PDUS)
 
+#define ISER_WC_BATCH_COUNT   16
+#define ISER_SIGNAL_CMD_COUNT 32
+
 #define ISER_VER                       0x10
 #define ISER_WSV                       0x08
 #define ISER_RSV                       0x04
 
 #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
+#define ISER_BEACON_WRID               0xfffffffffffffffeULL
 
+/**
+ * struct iser_hdr - iSER header
+ *
+ * @flags:        flags support (zbva, remote_inv)
+ * @rsvd:         reserved
+ * @write_stag:   write rkey
+ * @write_va:     write virtual address
+ * @reaf_stag:    read rkey
+ * @read_va:      read virtual address
+ */
 struct iser_hdr {
        u8      flags;
        u8      rsvd[3];
-       __be32  write_stag; /* write rkey */
+       __be32  write_stag;
        __be64  write_va;
-       __be32  read_stag;  /* read rkey */
+       __be32  read_stag;
        __be64  read_va;
 } __attribute__((packed));
 
@@ -179,7 +192,7 @@ struct iser_cm_hdr {
 /* Length of an object name string */
 #define ISER_OBJECT_NAME_SIZE              64
 
-enum iser_ib_conn_state {
+enum iser_conn_state {
        ISER_CONN_INIT,            /* descriptor allocd, no conn          */
        ISER_CONN_PENDING,         /* in the process of being established */
        ISER_CONN_UP,              /* up and running                      */
@@ -200,23 +213,42 @@ enum iser_data_dir {
        ISER_DIRS_NUM
 };
 
+/**
+ * struct iser_data_buf - iSER data buffer
+ *
+ * @buf:          pointer to the sg list
+ * @size:         num entries of this sg
+ * @data_len:     total beffer byte len
+ * @dma_nents:    returned by dma_map_sg
+ * @copy_buf:     allocated copy buf for SGs unaligned
+ *                for rdma which are copied
+ * @sg_single:    SG-ified clone of a non SG SC or
+ *                unaligned SG
+ */
 struct iser_data_buf {
-       void               *buf;      /* pointer to the sg list               */
-       unsigned int       size;      /* num entries of this sg               */
-       unsigned long      data_len;  /* total data len                       */
-       unsigned int       dma_nents; /* returned by dma_map_sg               */
-       char               *copy_buf; /* allocated copy buf for SGs unaligned *
-                                      * for rdma which are copied            */
-       struct scatterlist sg_single; /* SG-ified clone of a non SG SC or     *
-                                      * unaligned SG                         */
+       void               *buf;
+       unsigned int       size;
+       unsigned long      data_len;
+       unsigned int       dma_nents;
+       char               *copy_buf;
+       struct scatterlist sg_single;
   };
 
 /* fwd declarations */
 struct iser_device;
-struct iser_cq_desc;
 struct iscsi_iser_task;
 struct iscsi_endpoint;
 
+/**
+ * struct iser_mem_reg - iSER memory registration info
+ *
+ * @lkey:         MR local key
+ * @rkey:         MR remote key
+ * @va:           MR start address (buffer va)
+ * @len:          MR length
+ * @mem_h:        pointer to registration context (FMR/Fastreg)
+ * @is_mr:        indicates weather we registered the buffer
+ */
 struct iser_mem_reg {
        u32  lkey;
        u32  rkey;
@@ -226,11 +258,20 @@ struct iser_mem_reg {
        int  is_mr;
 };
 
+/**
+ * struct iser_regd_buf - iSER buffer registration desc
+ *
+ * @reg:          memory registration info
+ * @virt_addr:    virtual address of buffer
+ * @device:       reference to iser device
+ * @direction:    dma direction (for dma_unmap)
+ * @data_size:    data buffer size in bytes
+ */
 struct iser_regd_buf {
-       struct iser_mem_reg     reg;        /* memory registration info        */
+       struct iser_mem_reg     reg;
        void                    *virt_addr;
-       struct iser_device      *device;    /* device->device for dma_unmap    */
-       enum dma_data_direction direction;  /* direction for dma_unmap         */
+       struct iser_device      *device;
+       enum dma_data_direction direction;
        unsigned int            data_size;
 };
 
@@ -240,19 +281,39 @@ enum iser_desc_type {
        ISCSI_TX_DATAOUT
 };
 
+/**
+ * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ *
+ * @iser_header:   iser header
+ * @iscsi_header:  iscsi header
+ * @type:          command/control/dataout
+ * @dam_addr:      header buffer dma_address
+ * @tx_sg:         sg[0] points to iser/iscsi headers
+ *                 sg[1] optionally points to either of immediate data
+ *                 unsolicited data-out or control
+ * @num_sge:       number sges used on this TX task
+ */
 struct iser_tx_desc {
        struct iser_hdr              iser_header;
        struct iscsi_hdr             iscsi_header;
        enum   iser_desc_type        type;
        u64                          dma_addr;
-       /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
-       of immediate data, unsolicited data-out or control (login,text) */
        struct ib_sge                tx_sg[2];
        int                          num_sge;
 };
 
 #define ISER_RX_PAD_SIZE       (256 - (ISER_RX_PAYLOAD_SIZE + \
                                        sizeof(u64) + sizeof(struct ib_sge)))
+/**
+ * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ *
+ * @iser_header:   iser header
+ * @iscsi_header:  iscsi header
+ * @data:          received data segment
+ * @dma_addr:      receive buffer dma address
+ * @rx_sg:         ib_sge of receive buffer
+ * @pad:           for sense data TODO: Modify to maximum sense length supported
+ */
 struct iser_rx_desc {
        struct iser_hdr              iser_header;
        struct iscsi_hdr             iscsi_header;
@@ -265,25 +326,59 @@ struct iser_rx_desc {
 #define ISER_MAX_CQ 4
 
 struct iser_conn;
+struct ib_conn;
 struct iscsi_iser_task;
 
+/**
+ * struct iser_comp - iSER completion context
+ *
+ * @device:     pointer to device handle
+ * @cq:         completion queue
+ * @wcs:        work completion array
+ * @tasklet:    Tasklet handle
+ * @active_qps: Number of active QPs attached
+ *              to completion context
+ */
+struct iser_comp {
+       struct iser_device      *device;
+       struct ib_cq            *cq;
+       struct ib_wc             wcs[ISER_WC_BATCH_COUNT];
+       struct tasklet_struct    tasklet;
+       int                      active_qps;
+};
+
+/**
+ * struct iser_device - iSER device handle
+ *
+ * @ib_device:     RDMA device
+ * @pd:            Protection Domain for this device
+ * @dev_attr:      Device attributes container
+ * @mr:            Global DMA memory region
+ * @event_handler: IB events handle routine
+ * @ig_list:      entry in devices list
+ * @refcount:      Reference counter, dominated by open iser connections
+ * @comps_used:    Number of completion contexts used, Min between online
+ *                 cpus and device max completion vectors
+ * @comps:         Dinamically allocated array of completion handlers
+ * Memory registration pool Function pointers (FMR or Fastreg):
+ *     @iser_alloc_rdma_reg_res: Allocation of memory regions pool
+ *     @iser_free_rdma_reg_res:  Free of memory regions pool
+ *     @iser_reg_rdma_mem:       Memory registration routine
+ *     @iser_unreg_rdma_mem:     Memory deregistration routine
+ */
 struct iser_device {
        struct ib_device             *ib_device;
        struct ib_pd                 *pd;
        struct ib_device_attr        dev_attr;
-       struct ib_cq                 *rx_cq[ISER_MAX_CQ];
-       struct ib_cq                 *tx_cq[ISER_MAX_CQ];
        struct ib_mr                 *mr;
-       struct tasklet_struct        cq_tasklet[ISER_MAX_CQ];
        struct ib_event_handler      event_handler;
-       struct list_head             ig_list; /* entry in ig devices list */
+       struct list_head             ig_list;
        int                          refcount;
-       int                          cq_active_qps[ISER_MAX_CQ];
-       int                          cqs_used;
-       struct iser_cq_desc          *cq_desc;
-       int                          (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+       int                          comps_used;
+       struct iser_comp             comps[ISER_MAX_CQ];
+       int                          (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
                                                                unsigned cmds_max);
-       void                         (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+       void                         (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
        int                          (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
                                                          enum iser_data_dir cmd_dir);
        void                         (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
@@ -301,78 +396,160 @@ enum iser_reg_indicator {
        ISER_FASTREG_PROTECTED  = 1 << 3,
 };
 
+/**
+ * struct iser_pi_context - Protection information context
+ *
+ * @prot_mr:        protection memory region
+ * @prot_frpl:      protection fastreg page list
+ * @sig_mr:         signature feature enabled memory region
+ */
 struct iser_pi_context {
        struct ib_mr                   *prot_mr;
        struct ib_fast_reg_page_list   *prot_frpl;
        struct ib_mr                   *sig_mr;
 };
 
+/**
+ * struct fast_reg_descriptor - Fast registration descriptor
+ *
+ * @list:           entry in connection fastreg pool
+ * @data_mr:        data memory region
+ * @data_frpl:      data fastreg page list
+ * @pi_ctx:         protection information context
+ * @reg_indicators: fast registration indicators
+ */
 struct fast_reg_descriptor {
        struct list_head                  list;
-       /* For fast registration - FRWR */
        struct ib_mr                     *data_mr;
        struct ib_fast_reg_page_list     *data_frpl;
        struct iser_pi_context           *pi_ctx;
-       /* registration indicators container */
        u8                                reg_indicators;
 };
 
+/**
+ * struct ib_conn - Infiniband related objects
+ *
+ * @cma_id:              rdma_cm connection maneger handle
+ * @qp:                  Connection Queue-pair
+ * @post_recv_buf_count: post receive counter
+ * @rx_wr:               receive work request for batch posts
+ * @device:              reference to iser device
+ * @comp:                iser completion context
+ * @pi_support:          Indicate device T10-PI support
+ * @beacon:              beacon send wr to signal all flush errors were drained
+ * @flush_comp:          completes when all connection completions consumed
+ * @lock:                protects fmr/fastreg pool
+ * @union.fmr:
+ *     @pool:            FMR pool for fast registrations
+ *     @page_vec:        page vector to hold mapped commands pages
+ *                       used for registration
+ * @union.fastreg:
+ *     @pool:            Fast registration descriptors pool for fast
+ *                       registrations
+ *     @pool_size:       Size of pool
+ */
+struct ib_conn {
+       struct rdma_cm_id           *cma_id;
+       struct ib_qp                *qp;
+       int                          post_recv_buf_count;
+       struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
+       struct iser_device          *device;
+       struct iser_comp            *comp;
+       bool                         pi_support;
+       struct ib_send_wr            beacon;
+       struct completion            flush_comp;
+       spinlock_t                   lock;
+       union {
+               struct {
+                       struct ib_fmr_pool      *pool;
+                       struct iser_page_vec    *page_vec;
+               } fmr;
+               struct {
+                       struct list_head         pool;
+                       int                      pool_size;
+               } fastreg;
+       };
+};
+
+/**
+ * struct iser_conn - iSER connection context
+ *
+ * @ib_conn:          connection RDMA resources
+ * @iscsi_conn:       link to matching iscsi connection
+ * @ep:               transport handle
+ * @state:            connection logical state
+ * @qp_max_recv_dtos: maximum number of data outs, corresponds
+ *                    to max number of post recvs
+ * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
+ * @min_posted_rx:    (qp_max_recv_dtos >> 2)
+ * @name:             connection peer portal
+ * @release_work:     deffered work for release job
+ * @state_mutex:      protects iser onnection state
+ * @stop_completion:  conn_stop completion
+ * @ib_completion:    RDMA cleanup completion
+ * @up_completion:    connection establishment completed
+ *                    (state is ISER_CONN_UP)
+ * @conn_list:        entry in ig conn list
+ * @login_buf:        login data buffer (stores login parameters)
+ * @login_req_buf:    login request buffer
+ * @login_req_dma:    login request buffer dma address
+ * @login_resp_buf:   login response buffer
+ * @login_resp_dma:   login response buffer dma address
+ * @rx_desc_head:     head of rx_descs cyclic buffer
+ * @rx_descs:         rx buffers array (cyclic buffer)
+ * @num_rx_descs:     number of rx descriptors
+ */
 struct iser_conn {
+       struct ib_conn               ib_conn;
        struct iscsi_conn            *iscsi_conn;
        struct iscsi_endpoint        *ep;
-       enum iser_ib_conn_state      state;         /* rdma connection state   */
-       atomic_t                     refcount;
-       spinlock_t                   lock;          /* used for state changes  */
-       struct iser_device           *device;       /* device context          */
-       struct rdma_cm_id            *cma_id;       /* CMA ID                  */
-       struct ib_qp                 *qp;           /* QP                      */
-       unsigned                     qp_max_recv_dtos; /* num of rx buffers */
-       unsigned                     qp_max_recv_dtos_mask; /* above minus 1 */
-       unsigned                     min_posted_rx; /* qp_max_recv_dtos >> 2 */
-       int                          post_recv_buf_count; /* posted rx count  */
-       atomic_t                     post_send_buf_count; /* posted tx count   */
+       enum iser_conn_state         state;
+       unsigned                     qp_max_recv_dtos;
+       unsigned                     qp_max_recv_dtos_mask;
+       unsigned                     min_posted_rx;
        char                         name[ISER_OBJECT_NAME_SIZE];
        struct work_struct           release_work;
-       struct completion            stop_completion;
        struct mutex                 state_mutex;
-       struct completion            flush_completion;
+       struct completion            stop_completion;
+       struct completion            ib_completion;
        struct completion            up_completion;
-       struct list_head             conn_list;       /* entry in ig conn list */
+       struct list_head             conn_list;
 
        char                         *login_buf;
        char                         *login_req_buf, *login_resp_buf;
        u64                          login_req_dma, login_resp_dma;
        unsigned int                 rx_desc_head;
        struct iser_rx_desc          *rx_descs;
-       struct ib_recv_wr            rx_wr[ISER_MIN_POSTED_RX];
-       bool                         pi_support;
-
-       /* Connection memory registration pool */
-       union {
-               struct {
-                       struct ib_fmr_pool      *pool;     /* pool of IB FMRs         */
-                       struct iser_page_vec    *page_vec; /* represents SG to fmr maps*
-                                                           * maps serialized as tx is*/
-               } fmr;
-               struct {
-                       struct list_head        pool;
-                       int                     pool_size;
-               } fastreg;
-       };
+       u32                          num_rx_descs;
 };
 
+/**
+ * struct iscsi_iser_task - iser task context
+ *
+ * @desc:     TX descriptor
+ * @iser_conn:        link to iser connection
+ * @status:           current task status
+ * @sc:               link to scsi command
+ * @command_sent:     indicate if command was sent
+ * @dir:              iser data direction
+ * @rdma_regd:        task rdma registration desc
+ * @data:             iser data buffer desc
+ * @data_copy:        iser data copy buffer desc (bounce buffer)
+ * @prot:             iser protection buffer desc
+ * @prot_copy:        iser protection copy buffer desc (bounce buffer)
+ */
 struct iscsi_iser_task {
        struct iser_tx_desc          desc;
-       struct iser_conn             *ib_conn;
+       struct iser_conn             *iser_conn;
        enum iser_task_status        status;
        struct scsi_cmnd             *sc;
-       int                          command_sent;  /* set if command  sent  */
-       int                          dir[ISER_DIRS_NUM];      /* set if dir use*/
-       struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
-       struct iser_data_buf         data[ISER_DIRS_NUM];     /* orig. data des*/
-       struct iser_data_buf         data_copy[ISER_DIRS_NUM];/* contig. copy  */
-       struct iser_data_buf         prot[ISER_DIRS_NUM];     /* prot desc     */
-       struct iser_data_buf         prot_copy[ISER_DIRS_NUM];/* prot copy     */
+       int                          command_sent;
+       int                          dir[ISER_DIRS_NUM];
+       struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];
+       struct iser_data_buf         data[ISER_DIRS_NUM];
+       struct iser_data_buf         data_copy[ISER_DIRS_NUM];
+       struct iser_data_buf         prot[ISER_DIRS_NUM];
+       struct iser_data_buf         prot_copy[ISER_DIRS_NUM];
 };
 
 struct iser_page_vec {
@@ -382,17 +559,20 @@ struct iser_page_vec {
        int data_size;
 };
 
-struct iser_cq_desc {
-       struct iser_device           *device;
-       int                          cq_index;
-};
-
+/**
+ * struct iser_global: iSER global context
+ *
+ * @device_list_mutex:    protects device_list
+ * @device_list:          iser devices global list
+ * @connlist_mutex:       protects connlist
+ * @connlist:             iser connections global list
+ * @desc_cache:           kmem cache for tx dataout
+ */
 struct iser_global {
-       struct mutex      device_list_mutex;/*                   */
-       struct list_head  device_list;       /* all iSER devices */
+       struct mutex      device_list_mutex;
+       struct list_head  device_list;
        struct mutex      connlist_mutex;
-       struct list_head  connlist;             /* all iSER IB connections */
-
+       struct list_head  connlist;
        struct kmem_cache *desc_cache;
 };
 
@@ -401,9 +581,6 @@ extern int iser_debug_level;
 extern bool iser_pi_enable;
 extern int iser_pi_guard;
 
-/* allocate connection resources needed for rdma functionality */
-int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
-
 int iser_send_control(struct iscsi_conn *conn,
                      struct iscsi_task *task);
 
@@ -415,29 +592,30 @@ int iser_send_data_out(struct iscsi_conn *conn,
                       struct iscsi_data *hdr);
 
 void iscsi_iser_recv(struct iscsi_conn *conn,
-                    struct iscsi_hdr       *hdr,
-                    char                   *rx_data,
-                    int                    rx_data_len);
+                    struct iscsi_hdr *hdr,
+                    char *rx_data,
+                    int rx_data_len);
 
-void iser_conn_init(struct iser_conn *ib_conn);
+void iser_conn_init(struct iser_conn *iser_conn);
 
-void iser_conn_release(struct iser_conn *ib_conn);
+void iser_conn_release(struct iser_conn *iser_conn);
 
-void iser_conn_terminate(struct iser_conn *ib_conn);
+int iser_conn_terminate(struct iser_conn *iser_conn);
 
 void iser_release_work(struct work_struct *work);
 
 void iser_rcv_completion(struct iser_rx_desc *desc,
-                        unsigned long    dto_xfer_len,
-                       struct iser_conn *ib_conn);
+                        unsigned long dto_xfer_len,
+                        struct ib_conn *ib_conn);
 
-void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
+void iser_snd_completion(struct iser_tx_desc *desc,
+                        struct ib_conn *ib_conn);
 
 void iser_task_rdma_init(struct iscsi_iser_task *task);
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *task);
 
-void iser_free_rx_descriptors(struct iser_conn *ib_conn);
+void iser_free_rx_descriptors(struct iser_conn *iser_conn);
 
 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     struct iser_data_buf *mem,
@@ -449,38 +627,40 @@ int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
 int  iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
                               enum iser_data_dir cmd_dir);
 
-int  iser_connect(struct iser_conn   *ib_conn,
-                 struct sockaddr    *src_addr,
-                 struct sockaddr    *dst_addr,
-                 int                non_blocking);
+int  iser_connect(struct iser_conn *iser_conn,
+                 struct sockaddr *src_addr,
+                 struct sockaddr *dst_addr,
+                 int non_blocking);
 
-int  iser_reg_page_vec(struct iser_conn     *ib_conn,
+int  iser_reg_page_vec(struct ib_conn *ib_conn,
                       struct iser_page_vec *page_vec,
-                      struct iser_mem_reg  *mem_reg);
+                      struct iser_mem_reg *mem_reg);
 
 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
                        enum iser_data_dir cmd_dir);
 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
                            enum iser_data_dir cmd_dir);
 
-int  iser_post_recvl(struct iser_conn *ib_conn);
-int  iser_post_recvm(struct iser_conn *ib_conn, int count);
-int  iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
+int  iser_post_recvl(struct iser_conn *iser_conn);
+int  iser_post_recvm(struct iser_conn *iser_conn, int count);
+int  iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+                   bool signal);
 
 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
-                           struct iser_data_buf       *data,
-                           enum   iser_data_dir       iser_dir,
-                           enum   dma_data_direction  dma_dir);
+                          struct iser_data_buf *data,
+                          enum iser_data_dir iser_dir,
+                          enum dma_data_direction dma_dir);
 
 void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
                              struct iser_data_buf *data);
 int  iser_initialize_task_headers(struct iscsi_task *task,
                        struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fmr_pool(struct iser_conn *ib_conn);
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_fastreg_pool(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+                             struct iscsi_session *session);
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct ib_conn *ib_conn);
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
+void iser_free_fastreg_pool(struct ib_conn *ib_conn);
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector);
 #endif
index 8d44a4060634c084971f6755aa246ff384c9a40a..5a489ea63732c0166b695ce21fe16c6bb3206413 100644 (file)
@@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
 
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
-       struct iser_device  *device = iser_task->ib_conn->device;
+       struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -103,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                       unsigned int edtl)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
-       struct iser_device  *device = iser_task->ib_conn->device;
+       struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
        struct iser_regd_buf *regd_buf;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -160,10 +160,10 @@ iser_prepare_write_cmd(struct iscsi_task *task,
 }
 
 /* creates a new tx descriptor and adds header regd buffer */
-static void iser_create_send_desc(struct iser_conn     *ib_conn,
+static void iser_create_send_desc(struct iser_conn     *iser_conn,
                                  struct iser_tx_desc   *tx_desc)
 {
-       struct iser_device *device = ib_conn->device;
+       struct iser_device *device = iser_conn->ib_conn.device;
 
        ib_dma_sync_single_for_cpu(device->ib_device,
                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -179,103 +179,108 @@ static void iser_create_send_desc(struct iser_conn      *ib_conn,
        }
 }
 
-static void iser_free_login_buf(struct iser_conn *ib_conn)
+static void iser_free_login_buf(struct iser_conn *iser_conn)
 {
-       if (!ib_conn->login_buf)
+       struct iser_device *device = iser_conn->ib_conn.device;
+
+       if (!iser_conn->login_buf)
                return;
 
-       if (ib_conn->login_req_dma)
-               ib_dma_unmap_single(ib_conn->device->ib_device,
-                                   ib_conn->login_req_dma,
+       if (iser_conn->login_req_dma)
+               ib_dma_unmap_single(device->ib_device,
+                                   iser_conn->login_req_dma,
                                    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 
-       if (ib_conn->login_resp_dma)
-               ib_dma_unmap_single(ib_conn->device->ib_device,
-                                   ib_conn->login_resp_dma,
+       if (iser_conn->login_resp_dma)
+               ib_dma_unmap_single(device->ib_device,
+                                   iser_conn->login_resp_dma,
                                    ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 
-       kfree(ib_conn->login_buf);
+       kfree(iser_conn->login_buf);
 
        /* make sure we never redo any unmapping */
-       ib_conn->login_req_dma = 0;
-       ib_conn->login_resp_dma = 0;
-       ib_conn->login_buf = NULL;
+       iser_conn->login_req_dma = 0;
+       iser_conn->login_resp_dma = 0;
+       iser_conn->login_buf = NULL;
 }
 
-static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+static int iser_alloc_login_buf(struct iser_conn *iser_conn)
 {
-       struct iser_device      *device;
+       struct iser_device *device = iser_conn->ib_conn.device;
        int                     req_err, resp_err;
 
-       BUG_ON(ib_conn->device == NULL);
+       BUG_ON(device == NULL);
 
-       device = ib_conn->device;
-
-       ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+       iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
                                     ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-       if (!ib_conn->login_buf)
+       if (!iser_conn->login_buf)
                goto out_err;
 
-       ib_conn->login_req_buf  = ib_conn->login_buf;
-       ib_conn->login_resp_buf = ib_conn->login_buf +
+       iser_conn->login_req_buf  = iser_conn->login_buf;
+       iser_conn->login_resp_buf = iser_conn->login_buf +
                                                ISCSI_DEF_MAX_RECV_SEG_LEN;
 
-       ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
-                               (void *)ib_conn->login_req_buf,
-                               ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+       iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
+                                                    iser_conn->login_req_buf,
+                                                    ISCSI_DEF_MAX_RECV_SEG_LEN,
+                                                    DMA_TO_DEVICE);
 
-       ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
-                               (void *)ib_conn->login_resp_buf,
-                               ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+       iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
+                                                     iser_conn->login_resp_buf,
+                                                     ISER_RX_LOGIN_SIZE,
+                                                     DMA_FROM_DEVICE);
 
        req_err  = ib_dma_mapping_error(device->ib_device,
-                                       ib_conn->login_req_dma);
+                                       iser_conn->login_req_dma);
        resp_err = ib_dma_mapping_error(device->ib_device,
-                                       ib_conn->login_resp_dma);
+                                       iser_conn->login_resp_dma);
 
        if (req_err || resp_err) {
                if (req_err)
-                       ib_conn->login_req_dma = 0;
+                       iser_conn->login_req_dma = 0;
                if (resp_err)
-                       ib_conn->login_resp_dma = 0;
+                       iser_conn->login_resp_dma = 0;
                goto free_login_buf;
        }
        return 0;
 
 free_login_buf:
-       iser_free_login_buf(ib_conn);
+       iser_free_login_buf(iser_conn);
 
 out_err:
        iser_err("unable to alloc or map login buf\n");
        return -ENOMEM;
 }
 
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
+int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
+                             struct iscsi_session *session)
 {
        int i, j;
        u64 dma_addr;
        struct iser_rx_desc *rx_desc;
        struct ib_sge       *rx_sg;
-       struct iser_device  *device = ib_conn->device;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+       struct iser_device *device = ib_conn->device;
 
-       ib_conn->qp_max_recv_dtos = session->cmds_max;
-       ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
-       ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+       iser_conn->qp_max_recv_dtos = session->cmds_max;
+       iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+       iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
 
        if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
                goto create_rdma_reg_res_failed;
 
-       if (iser_alloc_login_buf(ib_conn))
+       if (iser_alloc_login_buf(iser_conn))
                goto alloc_login_buf_fail;
 
-       ib_conn->rx_descs = kmalloc(session->cmds_max *
+       iser_conn->num_rx_descs = session->cmds_max;
+       iser_conn->rx_descs = kmalloc(iser_conn->num_rx_descs *
                                sizeof(struct iser_rx_desc), GFP_KERNEL);
-       if (!ib_conn->rx_descs)
+       if (!iser_conn->rx_descs)
                goto rx_desc_alloc_fail;
 
-       rx_desc = ib_conn->rx_descs;
+       rx_desc = iser_conn->rx_descs;
 
-       for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)  {
+       for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
                dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
                                        ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
                if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -289,18 +294,18 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s
                rx_sg->lkey   = device->mr->lkey;
        }
 
-       ib_conn->rx_desc_head = 0;
+       iser_conn->rx_desc_head = 0;
        return 0;
 
 rx_desc_dma_map_failed:
-       rx_desc = ib_conn->rx_descs;
+       rx_desc = iser_conn->rx_descs;
        for (j = 0; j < i; j++, rx_desc++)
                ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
                                    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
-       kfree(ib_conn->rx_descs);
-       ib_conn->rx_descs = NULL;
+       kfree(iser_conn->rx_descs);
+       iser_conn->rx_descs = NULL;
 rx_desc_alloc_fail:
-       iser_free_login_buf(ib_conn);
+       iser_free_login_buf(iser_conn);
 alloc_login_buf_fail:
        device->iser_free_rdma_reg_res(ib_conn);
 create_rdma_reg_res_failed:
@@ -308,33 +313,35 @@ create_rdma_reg_res_failed:
        return -ENOMEM;
 }
 
-void iser_free_rx_descriptors(struct iser_conn *ib_conn)
+void iser_free_rx_descriptors(struct iser_conn *iser_conn)
 {
        int i;
        struct iser_rx_desc *rx_desc;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
 
-       if (!ib_conn->rx_descs)
+       if (!iser_conn->rx_descs)
                goto free_login_buf;
 
        if (device->iser_free_rdma_reg_res)
                device->iser_free_rdma_reg_res(ib_conn);
 
-       rx_desc = ib_conn->rx_descs;
-       for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
+       rx_desc = iser_conn->rx_descs;
+       for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
                ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
                                    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
-       kfree(ib_conn->rx_descs);
+       kfree(iser_conn->rx_descs);
        /* make sure we never redo any unmapping */
-       ib_conn->rx_descs = NULL;
+       iser_conn->rx_descs = NULL;
 
 free_login_buf:
-       iser_free_login_buf(ib_conn);
+       iser_free_login_buf(iser_conn);
 }
 
 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
 {
-       struct iser_conn *ib_conn = conn->dd_data;
+       struct iser_conn *iser_conn = conn->dd_data;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        struct iscsi_session *session = conn->session;
 
        iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
@@ -343,34 +350,37 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
                return 0;
 
        /*
-        * Check that there is one posted recv buffer (for the last login
-        * response) and no posted send buffers left - they must have been
-        * consumed during previous login phases.
+        * Check that there is one posted recv buffer
+        * (for the last login response).
         */
        WARN_ON(ib_conn->post_recv_buf_count != 1);
-       WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);
 
        if (session->discovery_sess) {
                iser_info("Discovery session, re-using login RX buffer\n");
                return 0;
        } else
                iser_info("Normal session, posting batch of RX %d buffers\n",
-                         ib_conn->min_posted_rx);
+                         iser_conn->min_posted_rx);
 
        /* Initial post receive buffers */
-       if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))
+       if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx))
                return -ENOMEM;
 
        return 0;
 }
 
+static inline bool iser_signal_comp(int sig_count)
+{
+       return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
+}
+
 /**
  * iser_send_command - send command PDU
  */
 int iser_send_command(struct iscsi_conn *conn,
                      struct iscsi_task *task)
 {
-       struct iser_conn *ib_conn = conn->dd_data;
+       struct iser_conn *iser_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        unsigned long edtl;
        int err;
@@ -378,12 +388,13 @@ int iser_send_command(struct iscsi_conn *conn,
        struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
        struct scsi_cmnd *sc  =  task->sc;
        struct iser_tx_desc *tx_desc = &iser_task->desc;
+       static unsigned sig_count;
 
        edtl = ntohl(hdr->data_length);
 
        /* build the tx desc regd header and add it to the tx desc dto */
        tx_desc->type = ISCSI_TX_SCSI_COMMAND;
-       iser_create_send_desc(ib_conn, tx_desc);
+       iser_create_send_desc(iser_conn, tx_desc);
 
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
                data_buf = &iser_task->data[ISER_DIR_IN];
@@ -423,7 +434,8 @@ int iser_send_command(struct iscsi_conn *conn,
 
        iser_task->status = ISER_TASK_STATUS_STARTED;
 
-       err = iser_post_send(ib_conn, tx_desc);
+       err = iser_post_send(&iser_conn->ib_conn, tx_desc,
+                            iser_signal_comp(++sig_count));
        if (!err)
                return 0;
 
@@ -439,7 +451,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
                       struct iscsi_task *task,
                       struct iscsi_data *hdr)
 {
-       struct iser_conn *ib_conn = conn->dd_data;
+       struct iser_conn *iser_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc *tx_desc = NULL;
        struct iser_regd_buf *regd_buf;
@@ -488,7 +500,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
                 itt, buf_offset, data_seg_len);
 
 
-       err = iser_post_send(ib_conn, tx_desc);
+       err = iser_post_send(&iser_conn->ib_conn, tx_desc, true);
        if (!err)
                return 0;
 
@@ -501,7 +513,7 @@ send_data_out_error:
 int iser_send_control(struct iscsi_conn *conn,
                      struct iscsi_task *task)
 {
-       struct iser_conn *ib_conn = conn->dd_data;
+       struct iser_conn *iser_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc *mdesc = &iser_task->desc;
        unsigned long data_seg_len;
@@ -510,9 +522,9 @@ int iser_send_control(struct iscsi_conn *conn,
 
        /* build the tx desc regd header and add it to the tx desc dto */
        mdesc->type = ISCSI_TX_CONTROL;
-       iser_create_send_desc(ib_conn, mdesc);
+       iser_create_send_desc(iser_conn, mdesc);
 
-       device = ib_conn->device;
+       device = iser_conn->ib_conn.device;
 
        data_seg_len = ntoh24(task->hdr->dlength);
 
@@ -524,16 +536,16 @@ int iser_send_control(struct iscsi_conn *conn,
                }
 
                ib_dma_sync_single_for_cpu(device->ib_device,
-                       ib_conn->login_req_dma, task->data_count,
+                       iser_conn->login_req_dma, task->data_count,
                        DMA_TO_DEVICE);
 
-               memcpy(ib_conn->login_req_buf, task->data, task->data_count);
+               memcpy(iser_conn->login_req_buf, task->data, task->data_count);
 
                ib_dma_sync_single_for_device(device->ib_device,
-                       ib_conn->login_req_dma, task->data_count,
+                       iser_conn->login_req_dma, task->data_count,
                        DMA_TO_DEVICE);
 
-               tx_dsg->addr    = ib_conn->login_req_dma;
+               tx_dsg->addr    = iser_conn->login_req_dma;
                tx_dsg->length  = task->data_count;
                tx_dsg->lkey    = device->mr->lkey;
                mdesc->num_sge = 2;
@@ -542,7 +554,7 @@ int iser_send_control(struct iscsi_conn *conn,
        if (task == conn->login_task) {
                iser_dbg("op %x dsl %lx, posting login rx buffer\n",
                         task->hdr->opcode, data_seg_len);
-               err = iser_post_recvl(ib_conn);
+               err = iser_post_recvl(iser_conn);
                if (err)
                        goto send_control_error;
                err = iser_post_rx_bufs(conn, task->hdr);
@@ -550,7 +562,7 @@ int iser_send_control(struct iscsi_conn *conn,
                        goto send_control_error;
        }
 
-       err = iser_post_send(ib_conn, mdesc);
+       err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
        if (!err)
                return 0;
 
@@ -564,15 +576,17 @@ send_control_error:
  */
 void iser_rcv_completion(struct iser_rx_desc *rx_desc,
                         unsigned long rx_xfer_len,
-                        struct iser_conn *ib_conn)
+                        struct ib_conn *ib_conn)
 {
+       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+                                                  ib_conn);
        struct iscsi_hdr *hdr;
        u64 rx_dma;
        int rx_buflen, outstanding, count, err;
 
        /* differentiate between login to all other PDUs */
-       if ((char *)rx_desc == ib_conn->login_resp_buf) {
-               rx_dma = ib_conn->login_resp_dma;
+       if ((char *)rx_desc == iser_conn->login_resp_buf) {
+               rx_dma = iser_conn->login_resp_dma;
                rx_buflen = ISER_RX_LOGIN_SIZE;
        } else {
                rx_dma = rx_desc->dma_addr;
@@ -580,14 +594,14 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
        }
 
        ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
-                       rx_buflen, DMA_FROM_DEVICE);
+                                  rx_buflen, DMA_FROM_DEVICE);
 
        hdr = &rx_desc->iscsi_header;
 
        iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
                        hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
 
-       iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data,
+       iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
                        rx_xfer_len - ISER_HEADERS_LEN);
 
        ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
@@ -599,21 +613,21 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
         * for the posted rx bufs refcount to become zero handles everything   */
        ib_conn->post_recv_buf_count--;
 
-       if (rx_dma == ib_conn->login_resp_dma)
+       if (rx_dma == iser_conn->login_resp_dma)
                return;
 
        outstanding = ib_conn->post_recv_buf_count;
-       if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
-               count = min(ib_conn->qp_max_recv_dtos - outstanding,
-                                               ib_conn->min_posted_rx);
-               err = iser_post_recvm(ib_conn, count);
+       if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
+               count = min(iser_conn->qp_max_recv_dtos - outstanding,
+                           iser_conn->min_posted_rx);
+               err = iser_post_recvm(iser_conn, count);
                if (err)
                        iser_err("posting %d rx bufs err %d\n", count, err);
        }
 }
 
 void iser_snd_completion(struct iser_tx_desc *tx_desc,
-                       struct iser_conn *ib_conn)
+                       struct ib_conn *ib_conn)
 {
        struct iscsi_task *task;
        struct iser_device *device = ib_conn->device;
@@ -625,8 +639,6 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,
                tx_desc = NULL;
        }
 
-       atomic_dec(&ib_conn->post_send_buf_count);
-
        if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
                /* this arithmetic is legal by libiscsi dd_data allocation */
                task = (void *) ((long)(void *)tx_desc -
@@ -658,7 +670,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
 {
-       struct iser_device *device = iser_task->ib_conn->device;
+       struct iser_device *device = iser_task->iser_conn->ib_conn.device;
        int is_rdma_data_aligned = 1;
        int is_rdma_prot_aligned = 1;
        int prot_count = scsi_prot_sg_count(iser_task->sc);
index 47acd3ad3a17e6a2e1609fb8752ea65d41b82387..6c5ce357fba6baa19fc62d39bd4c9b79be8c48a2 100644 (file)
@@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                        struct iser_data_buf *data_copy,
                                        enum iser_data_dir cmd_dir)
 {
-       struct ib_device *dev = iser_task->ib_conn->device->ib_device;
+       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
        struct scatterlist *sgl = (struct scatterlist *)data->buf;
        struct scatterlist *sg;
        char *mem = NULL;
@@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
        struct ib_device *dev;
        unsigned long  cmd_data_len;
 
-       dev = iser_task->ib_conn->device->ib_device;
+       dev = iser_task->iser_conn->ib_conn.device->ib_device;
 
        ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
                        (cmd_dir == ISER_DIR_OUT) ?
@@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
        struct ib_device *dev;
 
        iser_task->dir[iser_dir] = 1;
-       dev = iser_task->ib_conn->device->ib_device;
+       dev = iser_task->iser_conn->ib_conn.device->ib_device;
 
        data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
        if (data->dma_nents == 0) {
@@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
 {
        struct ib_device *dev;
 
-       dev = iser_task->ib_conn->device->ib_device;
+       dev = iser_task->iser_conn->ib_conn.device->ib_device;
        ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
 }
 
@@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
                              enum iser_data_dir cmd_dir,
                              int aligned_len)
 {
-       struct iscsi_conn    *iscsi_conn = iser_task->ib_conn->iscsi_conn;
+       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
 
        iscsi_conn->fmr_unalign_cnt++;
        iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
@@ -377,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
 int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                          enum iser_data_dir cmd_dir)
 {
-       struct iser_conn     *ib_conn = iser_task->ib_conn;
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_device   *device = ib_conn->device;
        struct ib_device     *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
@@ -432,7 +432,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
                                 ib_conn->fmr.page_vec->offset);
                        for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
                                iser_err("page_vec[%d] = 0x%llx\n", i,
-                                        (unsigned long long) ib_conn->fmr.page_vec->pages[i]);
+                                        (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
                }
                if (err)
                        return err;
@@ -440,77 +440,74 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
        return 0;
 }
 
-static inline enum ib_t10_dif_type
-scsi2ib_prot_type(unsigned char prot_type)
+static inline void
+iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
+                   struct ib_sig_domain *domain)
 {
-       switch (prot_type) {
-       case SCSI_PROT_DIF_TYPE0:
-               return IB_T10DIF_NONE;
-       case SCSI_PROT_DIF_TYPE1:
-               return IB_T10DIF_TYPE1;
-       case SCSI_PROT_DIF_TYPE2:
-               return IB_T10DIF_TYPE2;
-       case SCSI_PROT_DIF_TYPE3:
-               return IB_T10DIF_TYPE3;
-       default:
-               return IB_T10DIF_NONE;
-       }
-}
-
+       domain->sig_type = IB_SIG_TYPE_T10_DIF;
+       domain->sig.dif.pi_interval = sc->device->sector_size;
+       domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+       /*
+        * At the moment we hard code those, but in the future
+        * we will take them from sc.
+        */
+       domain->sig.dif.apptag_check_mask = 0xffff;
+       domain->sig.dif.app_escape = true;
+       domain->sig.dif.ref_escape = true;
+       if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
+           scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+               domain->sig.dif.ref_remap = true;
+};
 
 static int
 iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
 {
-       unsigned char scsi_ptype = scsi_get_prot_type(sc);
-
-       sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
-       sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
-       sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
-       sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
-
        switch (scsi_get_prot_op(sc)) {
        case SCSI_PROT_WRITE_INSERT:
        case SCSI_PROT_READ_STRIP:
-               sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
-               sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+               iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
                sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
-                                                 0xffffffff;
                break;
        case SCSI_PROT_READ_INSERT:
        case SCSI_PROT_WRITE_STRIP:
-               sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
-               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
-                                                0xffffffff;
-               sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+               sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+               iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+               /*
+                * At the moment we use this modparam to tell what is
+                * the memory bg_type, in the future we will take it
+                * from sc.
+                */
+               sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+                                                IB_T10DIF_CRC;
                break;
        case SCSI_PROT_READ_PASS:
        case SCSI_PROT_WRITE_PASS:
-               sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
-               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
-                                                0xffffffff;
-               sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+               iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
                sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
-                                                 0xffffffff;
+               iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
+               /*
+                * At the moment we use this modparam to tell what is
+                * the memory bg_type, in the future we will take it
+                * from sc.
+                */
+               sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
+                                                IB_T10DIF_CRC;
                break;
        default:
                iser_err("Unsupported PI operation %d\n",
                         scsi_get_prot_op(sc));
                return -EINVAL;
        }
+
        return 0;
 }
 
-
 static int
 iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
 {
        switch (scsi_get_prot_type(sc)) {
        case SCSI_PROT_DIF_TYPE0:
-               *mask = 0x0;
                break;
        case SCSI_PROT_DIF_TYPE1:
        case SCSI_PROT_DIF_TYPE2:
@@ -533,7 +530,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
                struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
                struct ib_sge *prot_sge, struct ib_sge *sig_sge)
 {
-       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_pi_context *pi_ctx = desc->pi_ctx;
        struct ib_send_wr sig_wr, inv_wr;
        struct ib_send_wr *bad_wr, *wr = NULL;
@@ -609,7 +606,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct ib_sge *sge)
 {
        struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
-       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
        struct ib_device *ibdev = device->ib_device;
        struct ib_mr *mr;
@@ -700,7 +697,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
                              enum iser_data_dir cmd_dir)
 {
-       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
        struct ib_device *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
index 3bfec4bbda5263ee636171c79f12f8548615823e..67225bb82bb50bc4b35ac8f350a750e5e3d96797 100644 (file)
 #include "iscsi_iser.h"
 
 #define ISCSI_ISER_MAX_CONN    8
-#define ISER_MAX_RX_CQ_LEN     (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN     (ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_RX_LEN                (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_TX_LEN                (ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_CQ_LEN                (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
+                                ISCSI_ISER_MAX_CONN)
+
+static int iser_cq_poll_limit = 512;
 
 static void iser_cq_tasklet_fn(unsigned long data);
 static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -71,7 +75,6 @@ static void iser_event_handler(struct ib_event_handler *handler,
  */
 static int iser_create_device_ib_res(struct iser_device *device)
 {
-       struct iser_cq_desc *cq_desc;
        struct ib_device_attr *dev_attr = &device->dev_attr;
        int ret, i;
 
@@ -101,51 +104,35 @@ static int iser_create_device_ib_res(struct iser_device *device)
                return -1;
        }
 
-       device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
+       device->comps_used = min(ISER_MAX_CQ,
+                                device->ib_device->num_comp_vectors);
        iser_info("using %d CQs, device %s supports %d vectors\n",
-                 device->cqs_used, device->ib_device->name,
+                 device->comps_used, device->ib_device->name,
                  device->ib_device->num_comp_vectors);
 
-       device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
-                                 GFP_KERNEL);
-       if (device->cq_desc == NULL)
-               goto cq_desc_err;
-       cq_desc = device->cq_desc;
-
        device->pd = ib_alloc_pd(device->ib_device);
        if (IS_ERR(device->pd))
                goto pd_err;
 
-       for (i = 0; i < device->cqs_used; i++) {
-               cq_desc[i].device   = device;
-               cq_desc[i].cq_index = i;
-
-               device->rx_cq[i] = ib_create_cq(device->ib_device,
-                                         iser_cq_callback,
-                                         iser_cq_event_callback,
-                                         (void *)&cq_desc[i],
-                                         ISER_MAX_RX_CQ_LEN, i);
-               if (IS_ERR(device->rx_cq[i])) {
-                       device->rx_cq[i] = NULL;
+       for (i = 0; i < device->comps_used; i++) {
+               struct iser_comp *comp = &device->comps[i];
+
+               comp->device = device;
+               comp->cq = ib_create_cq(device->ib_device,
+                                       iser_cq_callback,
+                                       iser_cq_event_callback,
+                                       (void *)comp,
+                                       ISER_MAX_CQ_LEN, i);
+               if (IS_ERR(comp->cq)) {
+                       comp->cq = NULL;
                        goto cq_err;
                }
 
-               device->tx_cq[i] = ib_create_cq(device->ib_device,
-                                         NULL, iser_cq_event_callback,
-                                         (void *)&cq_desc[i],
-                                         ISER_MAX_TX_CQ_LEN, i);
-
-               if (IS_ERR(device->tx_cq[i])) {
-                       device->tx_cq[i] = NULL;
+               if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
                        goto cq_err;
-               }
 
-               if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
-                       goto cq_err;
-
-               tasklet_init(&device->cq_tasklet[i],
-                            iser_cq_tasklet_fn,
-                       (unsigned long)&cq_desc[i]);
+               tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
+                            (unsigned long)comp);
        }
 
        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
@@ -164,19 +151,17 @@ static int iser_create_device_ib_res(struct iser_device *device)
 handler_err:
        ib_dereg_mr(device->mr);
 dma_mr_err:
-       for (i = 0; i < device->cqs_used; i++)
-               tasklet_kill(&device->cq_tasklet[i]);
+       for (i = 0; i < device->comps_used; i++)
+               tasklet_kill(&device->comps[i].tasklet);
 cq_err:
-       for (i = 0; i < device->cqs_used; i++) {
-               if (device->tx_cq[i])
-                       ib_destroy_cq(device->tx_cq[i]);
-               if (device->rx_cq[i])
-                       ib_destroy_cq(device->rx_cq[i]);
+       for (i = 0; i < device->comps_used; i++) {
+               struct iser_comp *comp = &device->comps[i];
+
+               if (comp->cq)
+                       ib_destroy_cq(comp->cq);
        }
        ib_dealloc_pd(device->pd);
 pd_err:
-       kfree(device->cq_desc);
-cq_desc_err:
        iser_err("failed to allocate an IB resource\n");
        return -1;
 }
@@ -190,20 +175,18 @@ static void iser_free_device_ib_res(struct iser_device *device)
        int i;
        BUG_ON(device->mr == NULL);
 
-       for (i = 0; i < device->cqs_used; i++) {
-               tasklet_kill(&device->cq_tasklet[i]);
-               (void)ib_destroy_cq(device->tx_cq[i]);
-               (void)ib_destroy_cq(device->rx_cq[i]);
-               device->tx_cq[i] = NULL;
-               device->rx_cq[i] = NULL;
+       for (i = 0; i < device->comps_used; i++) {
+               struct iser_comp *comp = &device->comps[i];
+
+               tasklet_kill(&comp->tasklet);
+               ib_destroy_cq(comp->cq);
+               comp->cq = NULL;
        }
 
        (void)ib_unregister_event_handler(&device->event_handler);
        (void)ib_dereg_mr(device->mr);
        (void)ib_dealloc_pd(device->pd);
 
-       kfree(device->cq_desc);
-
        device->mr = NULL;
        device->pd = NULL;
 }
@@ -213,7 +196,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
  *
  * returns 0 on success, or errno code on failure
  */
-int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
 {
        struct iser_device *device = ib_conn->device;
        struct ib_fmr_pool_param params;
@@ -263,7 +246,7 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
 /**
  * iser_free_fmr_pool - releases the FMR pool and page vec
  */
-void iser_free_fmr_pool(struct iser_conn *ib_conn)
+void iser_free_fmr_pool(struct ib_conn *ib_conn)
 {
        iser_info("freeing conn %p fmr pool %p\n",
                  ib_conn, ib_conn->fmr.pool);
@@ -367,10 +350,10 @@ fast_reg_mr_failure:
  * for fast registration work requests.
  * returns 0 on success, or errno code on failure
  */
-int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
 {
-       struct iser_device      *device = ib_conn->device;
-       struct fast_reg_descriptor      *desc;
+       struct iser_device *device = ib_conn->device;
+       struct fast_reg_descriptor *desc;
        int i, ret;
 
        INIT_LIST_HEAD(&ib_conn->fastreg.pool);
@@ -406,7 +389,7 @@ err:
 /**
  * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
  */
-void iser_free_fastreg_pool(struct iser_conn *ib_conn)
+void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 {
        struct fast_reg_descriptor *desc, *tmp;
        int i = 0;
@@ -440,7 +423,7 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn)
  *
  * returns 0 on success, -1 on failure
  */
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
        struct iser_device      *device;
        struct ib_qp_init_attr  init_attr;
@@ -455,28 +438,30 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 
        mutex_lock(&ig.connlist_mutex);
        /* select the CQ with the minimal number of usages */
-       for (index = 0; index < device->cqs_used; index++)
-               if (device->cq_active_qps[index] <
-                   device->cq_active_qps[min_index])
+       for (index = 0; index < device->comps_used; index++) {
+               if (device->comps[index].active_qps <
+                   device->comps[min_index].active_qps)
                        min_index = index;
-       device->cq_active_qps[min_index]++;
+       }
+       ib_conn->comp = &device->comps[min_index];
+       ib_conn->comp->active_qps++;
        mutex_unlock(&ig.connlist_mutex);
        iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
 
        init_attr.event_handler = iser_qp_event_callback;
        init_attr.qp_context    = (void *)ib_conn;
-       init_attr.send_cq       = device->tx_cq[min_index];
-       init_attr.recv_cq       = device->rx_cq[min_index];
+       init_attr.send_cq       = ib_conn->comp->cq;
+       init_attr.recv_cq       = ib_conn->comp->cq;
        init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
        init_attr.cap.max_send_sge = 2;
        init_attr.cap.max_recv_sge = 1;
        init_attr.sq_sig_type   = IB_SIGNAL_REQ_WR;
        init_attr.qp_type       = IB_QPT_RC;
        if (ib_conn->pi_support) {
-               init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
+               init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
                init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
        } else {
-               init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
+               init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
        }
 
        ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -494,30 +479,6 @@ out_err:
        return ret;
 }
 
-/**
- * releases the QP object
- */
-static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
-{
-       int cq_index;
-       BUG_ON(ib_conn == NULL);
-
-       iser_info("freeing conn %p cma_id %p qp %p\n",
-                 ib_conn, ib_conn->cma_id,
-                 ib_conn->qp);
-
-       /* qp is created only once both addr & route are resolved */
-
-       if (ib_conn->qp != NULL) {
-               cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
-               ib_conn->device->cq_active_qps[cq_index]--;
-
-               rdma_destroy_qp(ib_conn->cma_id);
-       }
-
-       ib_conn->qp       = NULL;
-}
-
 /**
  * based on the resolved device node GUID see if there already allocated
  * device for this device. If there's no such, create one.
@@ -572,88 +533,142 @@ static void iser_device_try_release(struct iser_device *device)
 /**
  * Called with state mutex held
  **/
-static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
-                                    enum iser_ib_conn_state comp,
-                                    enum iser_ib_conn_state exch)
+static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
+                                    enum iser_conn_state comp,
+                                    enum iser_conn_state exch)
 {
        int ret;
 
-       if ((ret = (ib_conn->state == comp)))
-               ib_conn->state = exch;
+       ret = (iser_conn->state == comp);
+       if (ret)
+               iser_conn->state = exch;
+
        return ret;
 }
 
 void iser_release_work(struct work_struct *work)
 {
-       struct iser_conn *ib_conn;
-       int rc;
+       struct iser_conn *iser_conn;
 
-       ib_conn = container_of(work, struct iser_conn, release_work);
+       iser_conn = container_of(work, struct iser_conn, release_work);
 
-       /* wait for .conn_stop callback */
-       rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
-       WARN_ON(rc == 0);
+       /* Wait for conn_stop to complete */
+       wait_for_completion(&iser_conn->stop_completion);
+       /* Wait for IB resouces cleanup to complete */
+       wait_for_completion(&iser_conn->ib_completion);
 
-       /* wait for the qp`s post send and post receive buffers to empty */
-       rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
-       WARN_ON(rc == 0);
+       mutex_lock(&iser_conn->state_mutex);
+       iser_conn->state = ISER_CONN_DOWN;
+       mutex_unlock(&iser_conn->state_mutex);
 
-       ib_conn->state = ISER_CONN_DOWN;
+       iser_conn_release(iser_conn);
+}
+
+/**
+ * iser_free_ib_conn_res - release IB related resources
+ * @iser_conn: iser connection struct
+ * @destroy_device: indicator if we need to try to release
+ *     the iser device (only iscsi shutdown and DEVICE_REMOVAL
+ *     will use this.
+ *
+ * This routine is called with the iser state mutex held
+ * so the cm_id removal is out of here. It is Safe to
+ * be invoked multiple times.
+ */
+static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
+                                 bool destroy_device)
+{
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+       struct iser_device *device = ib_conn->device;
 
-       mutex_lock(&ib_conn->state_mutex);
-       ib_conn->state = ISER_CONN_DOWN;
-       mutex_unlock(&ib_conn->state_mutex);
+       iser_info("freeing conn %p cma_id %p qp %p\n",
+                 iser_conn, ib_conn->cma_id, ib_conn->qp);
+
+       iser_free_rx_descriptors(iser_conn);
 
-       iser_conn_release(ib_conn);
+       if (ib_conn->qp != NULL) {
+               ib_conn->comp->active_qps--;
+               rdma_destroy_qp(ib_conn->cma_id);
+               ib_conn->qp = NULL;
+       }
+
+       if (destroy_device && device != NULL) {
+               iser_device_try_release(device);
+               ib_conn->device = NULL;
+       }
 }
 
 /**
  * Frees all conn objects and deallocs conn descriptor
  */
-void iser_conn_release(struct iser_conn *ib_conn)
+void iser_conn_release(struct iser_conn *iser_conn)
 {
-       struct iser_device  *device = ib_conn->device;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
 
        mutex_lock(&ig.connlist_mutex);
-       list_del(&ib_conn->conn_list);
+       list_del(&iser_conn->conn_list);
        mutex_unlock(&ig.connlist_mutex);
 
-       mutex_lock(&ib_conn->state_mutex);
-       BUG_ON(ib_conn->state != ISER_CONN_DOWN);
-
-       iser_free_rx_descriptors(ib_conn);
-       iser_free_ib_conn_res(ib_conn);
-       ib_conn->device = NULL;
-       /* on EVENT_ADDR_ERROR there's no device yet for this conn */
-       if (device != NULL)
-               iser_device_try_release(device);
-       mutex_unlock(&ib_conn->state_mutex);
+       mutex_lock(&iser_conn->state_mutex);
+       if (iser_conn->state != ISER_CONN_DOWN)
+               iser_warn("iser conn %p state %d, expected state down.\n",
+                         iser_conn, iser_conn->state);
+       /*
+        * In case we never got to bind stage, we still need to
+        * release IB resources (which is safe to call more than once).
+        */
+       iser_free_ib_conn_res(iser_conn, true);
+       mutex_unlock(&iser_conn->state_mutex);
 
-       /* if cma handler context, the caller actually destroy the id */
        if (ib_conn->cma_id != NULL) {
                rdma_destroy_id(ib_conn->cma_id);
                ib_conn->cma_id = NULL;
        }
-       kfree(ib_conn);
+
+       kfree(iser_conn);
 }
 
 /**
  * triggers start of the disconnect procedures and wait for them to be done
+ * Called with state mutex held
  */
-void iser_conn_terminate(struct iser_conn *ib_conn)
+int iser_conn_terminate(struct iser_conn *iser_conn)
 {
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+       struct ib_send_wr *bad_wr;
        int err = 0;
 
-       /* change the ib conn state only if the conn is UP, however always call
-        * rdma_disconnect since this is the only way to cause the CMA to change
-        * the QP state to ERROR
+       /* terminate the iser conn only if the conn state is UP */
+       if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
+                                      ISER_CONN_TERMINATING))
+               return 0;
+
+       iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
+
+       /* suspend queuing of new iscsi commands */
+       if (iser_conn->iscsi_conn)
+               iscsi_suspend_queue(iser_conn->iscsi_conn);
+
+       /*
+        * In case we didn't already clean up the cma_id (peer initiated
+        * a disconnection), we need to Cause the CMA to change the QP
+        * state to ERROR.
         */
+       if (ib_conn->cma_id) {
+               err = rdma_disconnect(ib_conn->cma_id);
+               if (err)
+                       iser_err("Failed to disconnect, conn: 0x%p err %d\n",
+                                iser_conn, err);
+
+               /* post an indication that all flush errors were consumed */
+               err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+               if (err)
+                       iser_err("conn %p failed to post beacon", ib_conn);
+
+               wait_for_completion(&ib_conn->flush_comp);
+       }
 
-       iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
-       err = rdma_disconnect(ib_conn->cma_id);
-       if (err)
-               iser_err("Failed to disconnect, conn: 0x%p err %d\n",
-                        ib_conn,err);
+       return 1;
 }
 
 /**
@@ -661,10 +676,10 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
  **/
 static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
 
-       ib_conn = (struct iser_conn *)cma_id->context;
-       ib_conn->state = ISER_CONN_DOWN;
+       iser_conn = (struct iser_conn *)cma_id->context;
+       iser_conn->state = ISER_CONN_DOWN;
 }
 
 /**
@@ -673,14 +688,16 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
 static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
        struct iser_device *device;
-       struct iser_conn   *ib_conn;
+       struct iser_conn   *iser_conn;
+       struct ib_conn   *ib_conn;
        int    ret;
 
-       ib_conn = (struct iser_conn *)cma_id->context;
-       if (ib_conn->state != ISER_CONN_PENDING)
+       iser_conn = (struct iser_conn *)cma_id->context;
+       if (iser_conn->state != ISER_CONN_PENDING)
                /* bailout */
                return;
 
+       ib_conn = &iser_conn->ib_conn;
        device = iser_device_find_by_ib_device(cma_id);
        if (!device) {
                iser_err("device lookup/creation failed\n");
@@ -719,14 +736,15 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
        struct rdma_conn_param conn_param;
        int    ret;
        struct iser_cm_hdr req_hdr;
-       struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
+       struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
 
-       if (ib_conn->state != ISER_CONN_PENDING)
+       if (iser_conn->state != ISER_CONN_PENDING)
                /* bailout */
                return;
 
-       ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
+       ret = iser_create_ib_conn_res(ib_conn);
        if (ret)
                goto failure;
 
@@ -755,57 +773,60 @@ failure:
 
 static void iser_connected_handler(struct rdma_cm_id *cma_id)
 {
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
        struct ib_qp_attr attr;
        struct ib_qp_init_attr init_attr;
 
-       ib_conn = (struct iser_conn *)cma_id->context;
-       if (ib_conn->state != ISER_CONN_PENDING)
+       iser_conn = (struct iser_conn *)cma_id->context;
+       if (iser_conn->state != ISER_CONN_PENDING)
                /* bailout */
                return;
 
        (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
        iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
-       ib_conn->state = ISER_CONN_UP;
-       complete(&ib_conn->up_completion);
+       iser_conn->state = ISER_CONN_UP;
+       complete(&iser_conn->up_completion);
 }
 
 static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 {
-       struct iser_conn *ib_conn;
-
-       ib_conn = (struct iser_conn *)cma_id->context;
+       struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
 
-       /* getting here when the state is UP means that the conn is being *
-        * terminated asynchronously from the iSCSI layer's perspective.  */
-       if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
-                                       ISER_CONN_TERMINATING)){
-               if (ib_conn->iscsi_conn)
-                       iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
+       if (iser_conn_terminate(iser_conn)) {
+               if (iser_conn->iscsi_conn)
+                       iscsi_conn_failure(iser_conn->iscsi_conn,
+                                          ISCSI_ERR_CONN_FAILED);
                else
                        iser_err("iscsi_iser connection isn't bound\n");
        }
+}
+
+static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
+                                bool destroy_device)
+{
+       struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
 
-       /* Complete the termination process if no posts are pending. This code
-        * block also exists in iser_handle_comp_error(), but it is needed here
-        * for cases of no flushes at all, e.g. discovery over rdma.
+       /*
+        * We are not guaranteed that we visited disconnected_handler
+        * by now, call it here to be safe that we handle CM drep
+        * and flush errors.
         */
-       if (ib_conn->post_recv_buf_count == 0 &&
-           (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
-               complete(&ib_conn->flush_completion);
-       }
-}
+       iser_disconnected_handler(cma_id);
+       iser_free_ib_conn_res(iser_conn, destroy_device);
+       complete(&iser_conn->ib_completion);
+};
 
 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-       struct iser_conn *ib_conn;
+       struct iser_conn *iser_conn;
+       int ret = 0;
 
-       ib_conn = (struct iser_conn *)cma_id->context;
+       iser_conn = (struct iser_conn *)cma_id->context;
        iser_info("event %d status %d conn %p id %p\n",
                  event->event, event->status, cma_id->context, cma_id);
 
-       mutex_lock(&ib_conn->state_mutex);
+       mutex_lock(&iser_conn->state_mutex);
        switch (event->event) {
        case RDMA_CM_EVENT_ADDR_RESOLVED:
                iser_addr_handler(cma_id);
@@ -824,57 +845,73 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
                iser_connect_error(cma_id);
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
-       case RDMA_CM_EVENT_DEVICE_REMOVAL:
        case RDMA_CM_EVENT_ADDR_CHANGE:
-       case RDMA_CM_EVENT_TIMEWAIT_EXIT:
                iser_disconnected_handler(cma_id);
                break;
+       case RDMA_CM_EVENT_DEVICE_REMOVAL:
+               /*
+                * we *must* destroy the device as we cannot rely
+                * on iscsid to be around to initiate error handling.
+                * also implicitly destroy the cma_id.
+                */
+               iser_cleanup_handler(cma_id, true);
+               iser_conn->ib_conn.cma_id = NULL;
+               ret = 1;
+               break;
+       case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+               iser_cleanup_handler(cma_id, false);
+               break;
        default:
                iser_err("Unexpected RDMA CM event (%d)\n", event->event);
                break;
        }
-       mutex_unlock(&ib_conn->state_mutex);
-       return 0;
+       mutex_unlock(&iser_conn->state_mutex);
+
+       return ret;
 }
 
-void iser_conn_init(struct iser_conn *ib_conn)
+void iser_conn_init(struct iser_conn *iser_conn)
 {
-       ib_conn->state = ISER_CONN_INIT;
-       ib_conn->post_recv_buf_count = 0;
-       atomic_set(&ib_conn->post_send_buf_count, 0);
-       init_completion(&ib_conn->stop_completion);
-       init_completion(&ib_conn->flush_completion);
-       init_completion(&ib_conn->up_completion);
-       INIT_LIST_HEAD(&ib_conn->conn_list);
-       spin_lock_init(&ib_conn->lock);
-       mutex_init(&ib_conn->state_mutex);
+       iser_conn->state = ISER_CONN_INIT;
+       iser_conn->ib_conn.post_recv_buf_count = 0;
+       init_completion(&iser_conn->ib_conn.flush_comp);
+       init_completion(&iser_conn->stop_completion);
+       init_completion(&iser_conn->ib_completion);
+       init_completion(&iser_conn->up_completion);
+       INIT_LIST_HEAD(&iser_conn->conn_list);
+       spin_lock_init(&iser_conn->ib_conn.lock);
+       mutex_init(&iser_conn->state_mutex);
 }
 
  /**
  * starts the process of connecting to the target
  * sleeps until the connection is established or rejected
  */
-int iser_connect(struct iser_conn   *ib_conn,
+int iser_connect(struct iser_conn   *iser_conn,
                 struct sockaddr    *src_addr,
                 struct sockaddr    *dst_addr,
                 int                 non_blocking)
 {
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        int err = 0;
 
-       mutex_lock(&ib_conn->state_mutex);
+       mutex_lock(&iser_conn->state_mutex);
 
-       sprintf(ib_conn->name, "%pISp", dst_addr);
+       sprintf(iser_conn->name, "%pISp", dst_addr);
 
-       iser_info("connecting to: %s\n", ib_conn->name);
+       iser_info("connecting to: %s\n", iser_conn->name);
 
        /* the device is known only --after-- address resolution */
        ib_conn->device = NULL;
 
-       ib_conn->state = ISER_CONN_PENDING;
+       iser_conn->state = ISER_CONN_PENDING;
+
+       ib_conn->beacon.wr_id = ISER_BEACON_WRID;
+       ib_conn->beacon.opcode = IB_WR_SEND;
 
        ib_conn->cma_id = rdma_create_id(iser_cma_handler,
-                                            (void *)ib_conn,
-                                            RDMA_PS_TCP, IB_QPT_RC);
+                                        (void *)iser_conn,
+                                        RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(ib_conn->cma_id)) {
                err = PTR_ERR(ib_conn->cma_id);
                iser_err("rdma_create_id failed: %d\n", err);
@@ -888,27 +925,27 @@ int iser_connect(struct iser_conn   *ib_conn,
        }
 
        if (!non_blocking) {
-               wait_for_completion_interruptible(&ib_conn->up_completion);
+               wait_for_completion_interruptible(&iser_conn->up_completion);
 
-               if (ib_conn->state != ISER_CONN_UP) {
+               if (iser_conn->state != ISER_CONN_UP) {
                        err =  -EIO;
                        goto connect_failure;
                }
        }
-       mutex_unlock(&ib_conn->state_mutex);
+       mutex_unlock(&iser_conn->state_mutex);
 
        mutex_lock(&ig.connlist_mutex);
-       list_add(&ib_conn->conn_list, &ig.connlist);
+       list_add(&iser_conn->conn_list, &ig.connlist);
        mutex_unlock(&ig.connlist_mutex);
        return 0;
 
 id_failure:
        ib_conn->cma_id = NULL;
 addr_failure:
-       ib_conn->state = ISER_CONN_DOWN;
+       iser_conn->state = ISER_CONN_DOWN;
 connect_failure:
-       mutex_unlock(&ib_conn->state_mutex);
-       iser_conn_release(ib_conn);
+       mutex_unlock(&iser_conn->state_mutex);
+       iser_conn_release(iser_conn);
        return err;
 }
 
@@ -917,7 +954,7 @@ connect_failure:
  *
  * returns: 0 on success, errno code on failure
  */
-int iser_reg_page_vec(struct iser_conn     *ib_conn,
+int iser_reg_page_vec(struct ib_conn *ib_conn,
                      struct iser_page_vec *page_vec,
                      struct iser_mem_reg  *mem_reg)
 {
@@ -987,7 +1024,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
                            enum iser_data_dir cmd_dir)
 {
        struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       struct iser_conn *ib_conn = iser_task->ib_conn;
+       struct iser_conn *iser_conn = iser_task->iser_conn;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        struct fast_reg_descriptor *desc = reg->mem_h;
 
        if (!reg->is_mr)
@@ -1000,17 +1038,18 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
        spin_unlock_bh(&ib_conn->lock);
 }
 
-int iser_post_recvl(struct iser_conn *ib_conn)
+int iser_post_recvl(struct iser_conn *iser_conn)
 {
        struct ib_recv_wr rx_wr, *rx_wr_failed;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
        struct ib_sge     sge;
        int ib_ret;
 
-       sge.addr   = ib_conn->login_resp_dma;
+       sge.addr   = iser_conn->login_resp_dma;
        sge.length = ISER_RX_LOGIN_SIZE;
        sge.lkey   = ib_conn->device->mr->lkey;
 
-       rx_wr.wr_id   = (unsigned long)ib_conn->login_resp_buf;
+       rx_wr.wr_id   = (unsigned long)iser_conn->login_resp_buf;
        rx_wr.sg_list = &sge;
        rx_wr.num_sge = 1;
        rx_wr.next    = NULL;
@@ -1024,20 +1063,21 @@ int iser_post_recvl(struct iser_conn *ib_conn)
        return ib_ret;
 }
 
-int iser_post_recvm(struct iser_conn *ib_conn, int count)
+int iser_post_recvm(struct iser_conn *iser_conn, int count)
 {
        struct ib_recv_wr *rx_wr, *rx_wr_failed;
        int i, ib_ret;
-       unsigned int my_rx_head = ib_conn->rx_desc_head;
+       struct ib_conn *ib_conn = &iser_conn->ib_conn;
+       unsigned int my_rx_head = iser_conn->rx_desc_head;
        struct iser_rx_desc *rx_desc;
 
        for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &ib_conn->rx_descs[my_rx_head];
+               rx_desc         = &iser_conn->rx_descs[my_rx_head];
                rx_wr->wr_id    = (unsigned long)rx_desc;
                rx_wr->sg_list  = &rx_desc->rx_sg;
                rx_wr->num_sge  = 1;
                rx_wr->next     = rx_wr + 1;
-               my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
+               my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
        }
 
        rx_wr--;
@@ -1049,7 +1089,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
                ib_conn->post_recv_buf_count -= count;
        } else
-               ib_conn->rx_desc_head = my_rx_head;
+               iser_conn->rx_desc_head = my_rx_head;
        return ib_ret;
 }
 
@@ -1059,139 +1099,166 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
  *
  * returns 0 on success, -1 on failure
  */
-int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
+int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
+                  bool signal)
 {
        int               ib_ret;
        struct ib_send_wr send_wr, *send_wr_failed;
 
        ib_dma_sync_single_for_device(ib_conn->device->ib_device,
-               tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
+                                     tx_desc->dma_addr, ISER_HEADERS_LEN,
+                                     DMA_TO_DEVICE);
 
        send_wr.next       = NULL;
        send_wr.wr_id      = (unsigned long)tx_desc;
        send_wr.sg_list    = tx_desc->tx_sg;
        send_wr.num_sge    = tx_desc->num_sge;
        send_wr.opcode     = IB_WR_SEND;
-       send_wr.send_flags = IB_SEND_SIGNALED;
-
-       atomic_inc(&ib_conn->post_send_buf_count);
+       send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
 
        ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
-       if (ib_ret) {
+       if (ib_ret)
                iser_err("ib_post_send failed, ret:%d\n", ib_ret);
-               atomic_dec(&ib_conn->post_send_buf_count);
-       }
+
        return ib_ret;
 }
 
-static void iser_handle_comp_error(struct iser_tx_desc *desc,
-                               struct iser_conn *ib_conn)
+/**
+ * is_iser_tx_desc - Indicate if the completion wr_id
+ *     is a TX descriptor or not.
+ * @iser_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
+{
+       void *start = iser_conn->rx_descs;
+       int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
+
+       if (wr_id >= start && wr_id < start + len)
+               return false;
+
+       return true;
+}
+
+/**
+ * iser_handle_comp_error() - Handle error completion
+ * @ib_conn:   connection RDMA resources
+ * @wc:        work completion
+ *
+ * Notes: We may handle a FLUSH error completion and in this case
+ *        we only cleanup in case TX type was DATAOUT. For non-FLUSH
+ *        error completion we should also notify iscsi layer that
+ *        connection is failed (in case we passed bind stage).
+ */
+static void
+iser_handle_comp_error(struct ib_conn *ib_conn,
+                      struct ib_wc *wc)
 {
-       if (desc && desc->type == ISCSI_TX_DATAOUT)
-               kmem_cache_free(ig.desc_cache, desc);
-
-       if (ib_conn->post_recv_buf_count == 0 &&
-           atomic_read(&ib_conn->post_send_buf_count) == 0) {
-               /**
-                * getting here when the state is UP means that the conn is
-                * being terminated asynchronously from the iSCSI layer's
-                * perspective. It is safe to peek at the connection state
-                * since iscsi_conn_failure is allowed to be called twice.
-                **/
-               if (ib_conn->state == ISER_CONN_UP)
-                       iscsi_conn_failure(ib_conn->iscsi_conn,
+       struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+                                                  ib_conn);
+
+       if (wc->status != IB_WC_WR_FLUSH_ERR)
+               if (iser_conn->iscsi_conn)
+                       iscsi_conn_failure(iser_conn->iscsi_conn,
                                           ISCSI_ERR_CONN_FAILED);
 
-               /* no more non completed posts to the QP, complete the
-                * termination process w.o worrying on disconnect event */
-               complete(&ib_conn->flush_completion);
+       if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
+               struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+
+               if (desc->type == ISCSI_TX_DATAOUT)
+                       kmem_cache_free(ig.desc_cache, desc);
+       } else {
+               ib_conn->post_recv_buf_count--;
        }
 }
 
-static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)
+/**
+ * iser_handle_wc - handle a single work completion
+ * @wc: work completion
+ *
+ * Soft-IRQ context, work completion can be either
+ * SEND or RECV, and can turn out successful or
+ * with error (or flush error).
+ */
+static void iser_handle_wc(struct ib_wc *wc)
 {
-       struct ib_cq  *cq = device->tx_cq[cq_index];
-       struct ib_wc  wc;
+       struct ib_conn *ib_conn;
        struct iser_tx_desc *tx_desc;
-       struct iser_conn *ib_conn;
-       int completed_tx = 0;
-
-       while (ib_poll_cq(cq, 1, &wc) == 1) {
-               tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
-               ib_conn = wc.qp->qp_context;
-               if (wc.status == IB_WC_SUCCESS) {
-                       if (wc.opcode == IB_WC_SEND)
-                               iser_snd_completion(tx_desc, ib_conn);
-                       else
-                               iser_err("expected opcode %d got %d\n",
-                                       IB_WC_SEND, wc.opcode);
+       struct iser_rx_desc *rx_desc;
+
+       ib_conn = wc->qp->qp_context;
+       if (wc->status == IB_WC_SUCCESS) {
+               if (wc->opcode == IB_WC_RECV) {
+                       rx_desc = (struct iser_rx_desc *)wc->wr_id;
+                       iser_rcv_completion(rx_desc, wc->byte_len,
+                                           ib_conn);
+               } else
+               if (wc->opcode == IB_WC_SEND) {
+                       tx_desc = (struct iser_tx_desc *)wc->wr_id;
+                       iser_snd_completion(tx_desc, ib_conn);
                } else {
-                       iser_err("tx id %llx status %d vend_err %x\n",
-                                wc.wr_id, wc.status, wc.vendor_err);
-                       if (wc.wr_id != ISER_FASTREG_LI_WRID) {
-                               atomic_dec(&ib_conn->post_send_buf_count);
-                               iser_handle_comp_error(tx_desc, ib_conn);
-                       }
+                       iser_err("Unknown wc opcode %d\n", wc->opcode);
                }
-               completed_tx++;
+       } else {
+               if (wc->status != IB_WC_WR_FLUSH_ERR)
+                       iser_err("wr id %llx status %d vend_err %x\n",
+                                wc->wr_id, wc->status, wc->vendor_err);
+               else
+                       iser_dbg("flush error: wr id %llx\n", wc->wr_id);
+
+               if (wc->wr_id != ISER_FASTREG_LI_WRID &&
+                   wc->wr_id != ISER_BEACON_WRID)
+                       iser_handle_comp_error(ib_conn, wc);
+
+               /* complete in case all flush errors were consumed */
+               if (wc->wr_id == ISER_BEACON_WRID)
+                       complete(&ib_conn->flush_comp);
        }
-       return completed_tx;
 }
 
-
+/**
+ * iser_cq_tasklet_fn - iSER completion polling loop
+ * @data: iSER completion context
+ *
+ * Soft-IRQ context, polling connection CQ until
+ * either CQ was empty or we exausted polling budget
+ */
 static void iser_cq_tasklet_fn(unsigned long data)
 {
-       struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
-       struct iser_device  *device = cq_desc->device;
-       int cq_index = cq_desc->cq_index;
-       struct ib_cq         *cq = device->rx_cq[cq_index];
-        struct ib_wc        wc;
-        struct iser_rx_desc *desc;
-        unsigned long       xfer_len;
-       struct iser_conn *ib_conn;
-       int completed_tx, completed_rx = 0;
-
-       /* First do tx drain, so in a case where we have rx flushes and a successful
-        * tx completion we will still go through completion error handling.
-        */
-       completed_tx = iser_drain_tx_cq(device, cq_index);
-
-       while (ib_poll_cq(cq, 1, &wc) == 1) {
-               desc     = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
-               BUG_ON(desc == NULL);
-               ib_conn = wc.qp->qp_context;
-               if (wc.status == IB_WC_SUCCESS) {
-                       if (wc.opcode == IB_WC_RECV) {
-                               xfer_len = (unsigned long)wc.byte_len;
-                               iser_rcv_completion(desc, xfer_len, ib_conn);
-                       } else
-                               iser_err("expected opcode %d got %d\n",
-                                       IB_WC_RECV, wc.opcode);
-               } else {
-                       if (wc.status != IB_WC_WR_FLUSH_ERR)
-                               iser_err("rx id %llx status %d vend_err %x\n",
-                                       wc.wr_id, wc.status, wc.vendor_err);
-                       ib_conn->post_recv_buf_count--;
-                       iser_handle_comp_error(NULL, ib_conn);
-               }
-               completed_rx++;
-               if (!(completed_rx & 63))
-                       completed_tx += iser_drain_tx_cq(device, cq_index);
+       struct iser_comp *comp = (struct iser_comp *)data;
+       struct ib_cq *cq = comp->cq;
+       struct ib_wc *const wcs = comp->wcs;
+       int i, n, completed = 0;
+
+       while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+               for (i = 0; i < n; i++)
+                       iser_handle_wc(&wcs[i]);
+
+               completed += n;
+               if (completed >= iser_cq_poll_limit)
+                       break;
        }
-       /* #warning "it is assumed here that arming CQ only once its empty" *
-        * " would not cause interrupts to be missed"                       */
+
+       /*
+        * It is assumed here that arming CQ only once its empty
+        * would not cause interrupts to be missed.
+        */
        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 
-       iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
+       iser_dbg("got %d completions\n", completed);
 }
 
 static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 {
-       struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
-       struct iser_device  *device = cq_desc->device;
-       int cq_index = cq_desc->cq_index;
+       struct iser_comp *comp = cq_context;
 
-       tasklet_schedule(&device->cq_tasklet[cq_index]);
+       tasklet_schedule(&comp->tasklet);
 }
 
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
index da8ff124762a3362eab5f26c1cd52f4bcfdac099..0bea5776bcbcc945166062d24dd70aedc9e9e06c 100644 (file)
@@ -2609,58 +2609,45 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
        return ret;
 }
 
-static inline enum ib_t10_dif_type
-se2ib_prot_type(enum target_prot_type prot_type)
-{
-       switch (prot_type) {
-       case TARGET_DIF_TYPE0_PROT:
-               return IB_T10DIF_NONE;
-       case TARGET_DIF_TYPE1_PROT:
-               return IB_T10DIF_TYPE1;
-       case TARGET_DIF_TYPE2_PROT:
-               return IB_T10DIF_TYPE2;
-       case TARGET_DIF_TYPE3_PROT:
-               return IB_T10DIF_TYPE3;
-       default:
-               return IB_T10DIF_NONE;
-       }
-}
+static inline void
+isert_set_dif_domain(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs,
+                    struct ib_sig_domain *domain)
+{
+       domain->sig_type = IB_SIG_TYPE_T10_DIF;
+       domain->sig.dif.bg_type = IB_T10DIF_CRC;
+       domain->sig.dif.pi_interval = se_cmd->se_dev->dev_attrib.block_size;
+       domain->sig.dif.ref_tag = se_cmd->reftag_seed;
+       /*
+        * At the moment we hard code those, but if in the future
+        * the target core would like to use it, we will take it
+        * from se_cmd.
+        */
+       domain->sig.dif.apptag_check_mask = 0xffff;
+       domain->sig.dif.app_escape = true;
+       domain->sig.dif.ref_escape = true;
+       if (se_cmd->prot_type == TARGET_DIF_TYPE1_PROT ||
+           se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)
+               domain->sig.dif.ref_remap = true;
+};
 
 static int
 isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
 {
-       enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type);
-
-       sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
-       sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
-       sig_attrs->mem.sig.dif.pi_interval =
-                               se_cmd->se_dev->dev_attrib.block_size;
-       sig_attrs->wire.sig.dif.pi_interval =
-                               se_cmd->se_dev->dev_attrib.block_size;
-
        switch (se_cmd->prot_op) {
        case TARGET_PROT_DIN_INSERT:
        case TARGET_PROT_DOUT_STRIP:
-               sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
-               sig_attrs->wire.sig.dif.type = ib_prot_type;
-               sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+               sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+               isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
                break;
        case TARGET_PROT_DOUT_INSERT:
        case TARGET_PROT_DIN_STRIP:
-               sig_attrs->mem.sig.dif.type = ib_prot_type;
-               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
-               sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+               sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+               isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
                break;
        case TARGET_PROT_DIN_PASS:
        case TARGET_PROT_DOUT_PASS:
-               sig_attrs->mem.sig.dif.type = ib_prot_type;
-               sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
-               sig_attrs->wire.sig.dif.type = ib_prot_type;
-               sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
-               sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+               isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->wire);
+               isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
                break;
        default:
                pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
index 9709b30e2d690da604336a48f96d3757bb7d25c5..69f5378455b73df80290e221d1761e2abb0968bc 100644 (file)
 #define MLX5_SIG_WQE_SIZE      (MLX5_SEND_WQE_BB * 5)
 #define MLX5_DIF_SIZE          8
 #define MLX5_STRIDE_BLOCK_OP   0x400
+#define MLX5_CPY_GRD_MASK      0xc0
+#define MLX5_CPY_APP_MASK      0x30
+#define MLX5_CPY_REF_MASK      0x0f
+#define MLX5_BSF_INC_REFTAG    (1 << 6)
+#define MLX5_BSF_INL_VALID     (1 << 15)
+#define MLX5_BSF_REFRESH_DIF   (1 << 14)
+#define MLX5_BSF_REPEAT_BLOCK  (1 << 7)
+#define MLX5_BSF_APPTAG_ESCAPE 0x1
+#define MLX5_BSF_APPREF_ESCAPE 0x2
 
 enum mlx5_qp_optpar {
        MLX5_QP_OPTPAR_ALT_ADDR_PATH            = 1 << 0,
@@ -287,6 +296,22 @@ struct mlx5_wqe_inline_seg {
        __be32  byte_count;
 };
 
+enum mlx5_sig_type {
+       MLX5_DIF_CRC = 0x1,
+       MLX5_DIF_IPCS = 0x2,
+};
+
+struct mlx5_bsf_inl {
+       __be16          vld_refresh;
+       __be16          dif_apptag;
+       __be32          dif_reftag;
+       u8              sig_type;
+       u8              rp_inv_seed;
+       u8              rsvd[3];
+       u8              dif_inc_ref_guard_check;
+       __be16          dif_app_bitmask_check;
+};
+
 struct mlx5_bsf {
        struct mlx5_bsf_basic {
                u8              bsf_size_sbs;
@@ -310,14 +335,8 @@ struct mlx5_bsf {
                __be32          w_tfs_psv;
                __be32          m_tfs_psv;
        } ext;
-       struct mlx5_bsf_inl {
-               __be32          w_inl_vld;
-               __be32          w_rsvd;
-               __be64          w_block_format;
-               __be32          m_inl_vld;
-               __be32          m_rsvd;
-               __be64          m_block_format;
-       } inl;
+       struct mlx5_bsf_inl     w_inl;
+       struct mlx5_bsf_inl     m_inl;
 };
 
 struct mlx5_klm {
index ed44cc07a7b3d8659eecf8a99aefb846b37d7f74..470a011d6fa49d0e44c512fb927000b1d69bf8ba 100644 (file)
@@ -491,20 +491,14 @@ struct ib_mr_init_attr {
        u32         flags;
 };
 
-enum ib_signature_type {
-       IB_SIG_TYPE_T10_DIF,
-};
-
 /**
- * T10-DIF Signature types
- * T10-DIF types are defined by SCSI
- * specifications.
+ * Signature types
+ * IB_SIG_TYPE_NONE: Unprotected.
+ * IB_SIG_TYPE_T10_DIF: Type T10-DIF
  */
-enum ib_t10_dif_type {
-       IB_T10DIF_NONE,
-       IB_T10DIF_TYPE1,
-       IB_T10DIF_TYPE2,
-       IB_T10DIF_TYPE3
+enum ib_signature_type {
+       IB_SIG_TYPE_NONE,
+       IB_SIG_TYPE_T10_DIF,
 };
 
 /**
@@ -520,24 +514,26 @@ enum ib_t10_dif_bg_type {
 /**
  * struct ib_t10_dif_domain - Parameters specific for T10-DIF
  *     domain.
- * @type: T10-DIF type (0|1|2|3)
  * @bg_type: T10-DIF block guard type (CRC|CSUM)
  * @pi_interval: protection information interval.
  * @bg: seed of guard computation.
  * @app_tag: application tag of guard block
  * @ref_tag: initial guard block reference tag.
- * @type3_inc_reftag: T10-DIF type 3 does not state
- *     about the reference tag, it is the user
- *     choice to increment it or not.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
  */
 struct ib_t10_dif_domain {
-       enum ib_t10_dif_type    type;
        enum ib_t10_dif_bg_type bg_type;
        u16                     pi_interval;
        u16                     bg;
        u16                     app_tag;
        u32                     ref_tag;
-       bool                    type3_inc_reftag;
+       bool                    ref_remap;
+       bool                    app_escape;
+       bool                    ref_escape;
+       u16                     apptag_check_mask;
 };
 
 /**