IB/core: Add support for "send with invalidate" work requests
authorRoland Dreier <rolandd@cisco.com>
Thu, 17 Apr 2008 04:09:32 +0000 (21:09 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 17 Apr 2008 04:09:32 +0000 (21:09 -0700)
Add a new IB_WR_SEND_WITH_INV send opcode that can be used to mark a
"send with invalidate" work request as defined in the iWARP verbs and
the InfiniBand base memory management extensions.  Also put "imm_data"
and a new "invalidate_rkey" member in a new "ex" union in struct
ib_send_wr. The invalidate_rkey member can be used to pass in an
R_Key/STag to be invalidated.  Add this new union to struct
ib_uverbs_send_wr.  Add code to copy the invalidate_rkey field in
ib_uverbs_post_send().

Fix up low-level drivers to deal with the change to struct ib_send_wr,
and just remove the imm_data initialization from net/sunrpc/xprtrdma/,
since that code never does any send with immediate operations.

Also, move the existing IB_DEVICE_SEND_W_INV flag to a new bit, since
the iWARP drivers currently in the tree set the bit.  The amso1100
driver at least will silently fail to honor the IB_SEND_INVALIDATE bit
if passed in as part of userspace send requests (since it does not
implement kernel bypass work request queueing).  Remove the flag from
all existing drivers that set it until we know which ones are OK.

The values chosen for the new flag is not consecutive to avoid clashing
with flags defined in the XRC patches, which are not merged yet but
which are already in use and are likely to be merged soon.

This resurrects a patch sent long ago by Mikkel Hagen <mhagen@iol.unh.edu>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
15 files changed:
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/hw/amso1100/c2_rnic.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_hw.c
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
net/sunrpc/xprtrdma/verbs.c

index 9e98cec6230fd8398f7c0c4ddc7071e3f22b5366..2c3bff5fe8676f304d24b0d4490030565e9f20c6 100644 (file)
@@ -1463,7 +1463,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                next->num_sge    = user_wr->num_sge;
                next->opcode     = user_wr->opcode;
                next->send_flags = user_wr->send_flags;
-               next->imm_data   = (__be32 __force) user_wr->imm_data;
 
                if (is_ud) {
                        next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
@@ -1476,14 +1475,24 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                        next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
                } else {
                        switch (next->opcode) {
-                       case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
+                               next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
+                       case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_READ:
                                next->wr.rdma.remote_addr =
                                        user_wr->wr.rdma.remote_addr;
                                next->wr.rdma.rkey        =
                                        user_wr->wr.rdma.rkey;
                                break;
+                       case IB_WR_SEND_WITH_IMM:
+                               next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
+                               break;
+                       case IB_WR_SEND_WITH_INV:
+                               next->ex.invalidate_rkey =
+                                       user_wr->ex.invalidate_rkey;
+                               break;
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
                                next->wr.atomic.remote_addr =
index 7a625524e0c5a44771cc4ecc33a4c642ada54eba..b1441aeb60c27d017d66e667ffbcd071f56f5c36 100644 (file)
@@ -455,7 +455,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
             IB_DEVICE_CURR_QP_STATE_MOD |
             IB_DEVICE_SYS_IMAGE_GUID |
             IB_DEVICE_ZERO_STAG |
-            IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+            IB_DEVICE_MEM_WINDOW);
 
        /* Allocate the qptr_array */
        c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
index 50e1f2a16e0c44bf975fec6a2e2f1d4b295ff057..ca7265443c05c37cf79f5fc35811b93dd206b53d 100644 (file)
@@ -1109,8 +1109,7 @@ int iwch_register_device(struct iwch_dev *dev)
        memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
        dev->ibdev.owner = THIS_MODULE;
        dev->device_cap_flags =
-           (IB_DEVICE_ZERO_STAG |
-            IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+           (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
 
        dev->ibdev.uverbs_cmd_mask =
            (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
index bc5d9b0813e5ec7bae9afa7659d890d9ac65aaab..8891c3b0a3d51d1ecc1e2e47766a27d91a4c8be6 100644 (file)
@@ -72,7 +72,7 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
        wqe->send.reserved[2] = 0;
        if (wr->opcode == IB_WR_SEND_WITH_IMM) {
                plen = 4;
-               wqe->send.sgl[0].stag = wr->imm_data;
+               wqe->send.sgl[0].stag = wr->ex.imm_data;
                wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
                wqe->send.num_sgle = __constant_cpu_to_be32(0);
                *flit_cnt = 5;
@@ -112,7 +112,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
 
        if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                plen = 4;
-               wqe->write.sgl[0].stag = wr->imm_data;
+               wqe->write.sgl[0].stag = wr->ex.imm_data;
                wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
                wqe->write.num_sgle = __constant_cpu_to_be32(0);
                *flit_cnt = 6;
index 2ce8cffb8664537035586cf30a5004adcdb83a25..a20bbf4661881a095a34aab7cf13f5a443b832df 100644 (file)
@@ -188,7 +188,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
        if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
            send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                /* this might not work as long as HW does not support it */
-               wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+               wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
                wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
        }
 
index 467981905bbeb09672b2d60e89c07a5a45c3c257..c405dfba5531329ae4324ee7690212640522a370 100644 (file)
@@ -308,7 +308,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                        else {
                                qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.imm_data;
+                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                        }
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -346,7 +346,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.imm_data;
+                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= 1 << 23;
@@ -490,7 +490,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                }
                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -526,7 +526,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
index bcaa2914e341195f9d2832fa01bcf460cf044bd6..8ac5c1d82ccdb9cdb49b1b01db264c38e9f04fd0 100644 (file)
@@ -310,7 +310,7 @@ again:
        switch (wqe->wr.opcode) {
        case IB_WR_SEND_WITH_IMM:
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.imm_data;
+               wc.imm_data = wqe->wr.ex.imm_data;
                /* FALLTHROUGH */
        case IB_WR_SEND:
                if (!ipath_get_rwqe(qp, 0)) {
@@ -339,7 +339,7 @@ again:
                        goto err;
                }
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.imm_data;
+               wc.imm_data = wqe->wr.ex.imm_data;
                if (!ipath_get_rwqe(qp, 1))
                        goto rnr_nak;
                /* FALLTHROUGH */
index 2dd8de20d221a657f670b6bc63b3706ac94f53c3..bfe8926b5514a7a9e4b3bd6c5b67ab63768bf6c7 100644 (file)
@@ -94,7 +94,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(SEND_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.imm_data;
+                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                        }
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -123,7 +123,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.imm_data;
+                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= 1 << 23;
@@ -152,7 +152,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                }
                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -177,7 +177,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                        qp->s_state =
                                OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
index 918f520706533e457f3624c367e4f0d71207f286..8b6a261c89e328de7f9c4a230aa528d5e980eff0 100644 (file)
@@ -95,7 +95,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 
        if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = swqe->wr.imm_data;
+               wc.imm_data = swqe->wr.ex.imm_data;
        } else {
                wc.wc_flags = 0;
                wc.imm_data = 0;
@@ -327,7 +327,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
        }
        if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
                qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->wr.imm_data;
+               ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
                bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
        } else
                bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
index f5210c17e312d6e1147164a13e0b29cf9957adc7..38e651a67589d20bd3546c09081599f467212e71 100644 (file)
@@ -1249,7 +1249,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data    = wr->imm_data;
+               sqp->ud_header.immediate_data    = wr->ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1492,7 +1492,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ctrl->imm = wr->imm_data;
+                       ctrl->imm = wr->ex.imm_data;
                else
                        ctrl->imm = 0;
 
index 8433897624bc79f5a7733dc2e3f81bd28719a3da..b3fd6b05d79d805597f5bd37bc8552270babb999 100644 (file)
@@ -1532,7 +1532,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data = wr->imm_data;
+               sqp->ud_header.immediate_data = wr->ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1679,7 +1679,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
@@ -2020,7 +2020,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
index 134189d77ed32411132c6c451dc9c457180e613c..aa53aab91bf8a3341dd3df72d155c4b2201db2c1 100644 (file)
@@ -393,7 +393,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nesadapter->base_pd = 1;
 
        nesadapter->device_cap_flags =
-                       IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
+               IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
 
        nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
                        [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
index 64a721fcbc1c4db215cc949816bf58ac08fbe3b2..8d65bf0a625b0ef14a119760246e8acd323d6bbc 100644 (file)
@@ -533,7 +533,10 @@ struct ib_uverbs_send_wr {
        __u32 num_sge;
        __u32 opcode;
        __u32 send_flags;
-       __u32 imm_data;
+       union {
+               __u32 imm_data;
+               __u32 invalidate_rkey;
+       } ex;
        union {
                struct {
                        __u64 remote_addr;
index 66928e9cab19af76bea2c19215baefcfa190b15f..c48f6af5ef9a0987db49e774cf6cc95ad28f2909 100644 (file)
@@ -94,7 +94,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_SRQ_RESIZE            = (1<<13),
        IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
        IB_DEVICE_ZERO_STAG             = (1<<15),
-       IB_DEVICE_SEND_W_INV            = (1<<16),
+       IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
        IB_DEVICE_MEM_WINDOW            = (1<<17),
        /*
         * Devices should set IB_DEVICE_UD_IP_SUM if they support
@@ -105,6 +105,7 @@ enum ib_device_cap_flags {
         */
        IB_DEVICE_UD_IP_CSUM            = (1<<18),
        IB_DEVICE_UD_TSO                = (1<<19),
+       IB_DEVICE_SEND_W_INV            = (1<<21),
 };
 
 enum ib_atomic_cap {
@@ -625,7 +626,8 @@ enum ib_wr_opcode {
        IB_WR_RDMA_READ,
        IB_WR_ATOMIC_CMP_AND_SWP,
        IB_WR_ATOMIC_FETCH_AND_ADD,
-       IB_WR_LSO
+       IB_WR_LSO,
+       IB_WR_SEND_WITH_INV,
 };
 
 enum ib_send_flags {
@@ -649,7 +651,10 @@ struct ib_send_wr {
        int                     num_sge;
        enum ib_wr_opcode       opcode;
        int                     send_flags;
-       __be32                  imm_data;
+       union {
+               __be32          imm_data;
+               u32             invalidate_rkey;
+       } ex;
        union {
                struct {
                        u64     remote_addr;
index ffbf22a1d2ca0e1a24c76f9e7231732ff7ce8dfd..8ea283ecc522a8a5e8e033b12f6485fc5ef456c4 100644 (file)
@@ -1573,7 +1573,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        send_wr.sg_list = req->rl_send_iov;
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
-       send_wr.imm_data = 0;
        if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
                ib_dma_sync_single_for_device(ia->ri_id->device,
                        req->rl_send_iov[3].addr, req->rl_send_iov[3].length,