IB/core: Add support for "send with invalidate" work requests
Roland Dreier [Thu, 17 Apr 2008 04:09:32 +0000 (21:09 -0700)]
Add a new IB_WR_SEND_WITH_INV send opcode that can be used to mark a
"send with invalidate" work request as defined in the iWARP verbs and
the InfiniBand base memory management extensions.  Also put "imm_data"
and a new "invalidate_rkey" member in a new "ex" union in struct
ib_send_wr. The invalidate_rkey member can be used to pass in an
R_Key/STag to be invalidated.  Add this new union to struct
ib_uverbs_send_wr.  Add code to copy the invalidate_rkey field in
ib_uverbs_post_send().

Fix up low-level drivers to deal with the change to struct ib_send_wr,
and just remove the imm_data initialization from net/sunrpc/xprtrdma/,
since that code never does any send with immediate operations.

Also, move the existing IB_DEVICE_SEND_W_INV flag to a new bit, since
the iWARP drivers currently in the tree set the bit.  The amso1100
driver at least will silently fail to honor the IB_SEND_INVALIDATE bit
if passed in as part of userspace send requests (since it does not
implement kernel bypass work request queueing).  Remove the flag from
all existing drivers that set it until we know which ones are OK.

The values chosen for the new flag is not consecutive to avoid clashing
with flags defined in the XRC patches, which are not merged yet but
which are already in use and are likely to be merged soon.

This resurrects a patch sent long ago by Mikkel Hagen <mhagen@iol.unh.edu>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

15 files changed:
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/hw/amso1100/c2_rnic.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_hw.c
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
net/sunrpc/xprtrdma/verbs.c

index 9e98cec..2c3bff5 100644 (file)
@@ -1463,7 +1463,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                next->num_sge    = user_wr->num_sge;
                next->opcode     = user_wr->opcode;
                next->send_flags = user_wr->send_flags;
-               next->imm_data   = (__be32 __force) user_wr->imm_data;
 
                if (is_ud) {
                        next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
@@ -1476,14 +1475,24 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                        next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
                } else {
                        switch (next->opcode) {
-                       case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
+                               next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
+                       case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_READ:
                                next->wr.rdma.remote_addr =
                                        user_wr->wr.rdma.remote_addr;
                                next->wr.rdma.rkey        =
                                        user_wr->wr.rdma.rkey;
                                break;
+                       case IB_WR_SEND_WITH_IMM:
+                               next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
+                               break;
+                       case IB_WR_SEND_WITH_INV:
+                               next->ex.invalidate_rkey =
+                                       user_wr->ex.invalidate_rkey;
+                               break;
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
                                next->wr.atomic.remote_addr =
index 7a62552..b1441ae 100644 (file)
@@ -455,7 +455,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
             IB_DEVICE_CURR_QP_STATE_MOD |
             IB_DEVICE_SYS_IMAGE_GUID |
             IB_DEVICE_ZERO_STAG |
-            IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+            IB_DEVICE_MEM_WINDOW);
 
        /* Allocate the qptr_array */
        c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
index 50e1f2a..ca72654 100644 (file)
@@ -1109,8 +1109,7 @@ int iwch_register_device(struct iwch_dev *dev)
        memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
        dev->ibdev.owner = THIS_MODULE;
        dev->device_cap_flags =
-           (IB_DEVICE_ZERO_STAG |
-            IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+           (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
 
        dev->ibdev.uverbs_cmd_mask =
            (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
index bc5d9b0..8891c3b 100644 (file)
@@ -72,7 +72,7 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
        wqe->send.reserved[2] = 0;
        if (wr->opcode == IB_WR_SEND_WITH_IMM) {
                plen = 4;
-               wqe->send.sgl[0].stag = wr->imm_data;
+               wqe->send.sgl[0].stag = wr->ex.imm_data;
                wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
                wqe->send.num_sgle = __constant_cpu_to_be32(0);
                *flit_cnt = 5;
@@ -112,7 +112,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
 
        if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                plen = 4;
-               wqe->write.sgl[0].stag = wr->imm_data;
+               wqe->write.sgl[0].stag = wr->ex.imm_data;
                wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
                wqe->write.num_sgle = __constant_cpu_to_be32(0);
                *flit_cnt = 6;
index 2ce8cff..a20bbf4 100644 (file)
@@ -188,7 +188,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
        if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
            send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                /* this might not work as long as HW does not support it */
-               wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+               wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
                wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
        }
 
index 4679819..c405dfb 100644 (file)
@@ -308,7 +308,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                        else {
                                qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.imm_data;
+                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                        }
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -346,7 +346,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.imm_data;
+                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= 1 << 23;
@@ -490,7 +490,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                }
                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -526,7 +526,7 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
index bcaa291..8ac5c1d 100644 (file)
@@ -310,7 +310,7 @@ again:
        switch (wqe->wr.opcode) {
        case IB_WR_SEND_WITH_IMM:
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.imm_data;
+               wc.imm_data = wqe->wr.ex.imm_data;
                /* FALLTHROUGH */
        case IB_WR_SEND:
                if (!ipath_get_rwqe(qp, 0)) {
@@ -339,7 +339,7 @@ again:
                        goto err;
                }
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = wqe->wr.imm_data;
+               wc.imm_data = wqe->wr.ex.imm_data;
                if (!ipath_get_rwqe(qp, 1))
                        goto rnr_nak;
                /* FALLTHROUGH */
index 2dd8de2..bfe8926 100644 (file)
@@ -94,7 +94,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(SEND_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the BTH */
-                               ohdr->u.imm_data = wqe->wr.imm_data;
+                               ohdr->u.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                        }
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -123,7 +123,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                                qp->s_state =
                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after the RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.imm_data;
+                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
                                hwords += 1;
                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= 1 << 23;
@@ -152,7 +152,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                else {
                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                }
                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -177,7 +177,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                        qp->s_state =
                                OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
                        /* Immediate data comes after the BTH */
-                       ohdr->u.imm_data = wqe->wr.imm_data;
+                       ohdr->u.imm_data = wqe->wr.ex.imm_data;
                        hwords += 1;
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
index 918f520..8b6a261 100644 (file)
@@ -95,7 +95,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 
        if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
                wc.wc_flags = IB_WC_WITH_IMM;
-               wc.imm_data = swqe->wr.imm_data;
+               wc.imm_data = swqe->wr.ex.imm_data;
        } else {
                wc.wc_flags = 0;
                wc.imm_data = 0;
@@ -327,7 +327,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
        }
        if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
                qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->wr.imm_data;
+               ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
                bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
        } else
                bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
index f5210c1..38e651a 100644 (file)
@@ -1249,7 +1249,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data    = wr->imm_data;
+               sqp->ud_header.immediate_data    = wr->ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1492,7 +1492,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ctrl->imm = wr->imm_data;
+                       ctrl->imm = wr->ex.imm_data;
                else
                        ctrl->imm = 0;
 
index 8433897..b3fd6b0 100644 (file)
@@ -1532,7 +1532,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data = wr->imm_data;
+               sqp->ud_header.immediate_data = wr->ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1679,7 +1679,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
@@ -2020,7 +2020,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
index 134189d..aa53aab 100644 (file)
@@ -393,7 +393,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
        nesadapter->base_pd = 1;
 
        nesadapter->device_cap_flags =
-                       IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
+               IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
 
        nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
                        [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
index 64a721f..8d65bf0 100644 (file)
@@ -533,7 +533,10 @@ struct ib_uverbs_send_wr {
        __u32 num_sge;
        __u32 opcode;
        __u32 send_flags;
-       __u32 imm_data;
+       union {
+               __u32 imm_data;
+               __u32 invalidate_rkey;
+       } ex;
        union {
                struct {
                        __u64 remote_addr;
index 66928e9..c48f6af 100644 (file)
@@ -94,7 +94,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_SRQ_RESIZE            = (1<<13),
        IB_DEVICE_N_NOTIFY_CQ           = (1<<14),
        IB_DEVICE_ZERO_STAG             = (1<<15),
-       IB_DEVICE_SEND_W_INV            = (1<<16),
+       IB_DEVICE_RESERVED              = (1<<16), /* old SEND_W_INV */
        IB_DEVICE_MEM_WINDOW            = (1<<17),
        /*
         * Devices should set IB_DEVICE_UD_IP_SUM if they support
@@ -105,6 +105,7 @@ enum ib_device_cap_flags {
         */
        IB_DEVICE_UD_IP_CSUM            = (1<<18),
        IB_DEVICE_UD_TSO                = (1<<19),
+       IB_DEVICE_SEND_W_INV            = (1<<21),
 };
 
 enum ib_atomic_cap {
@@ -625,7 +626,8 @@ enum ib_wr_opcode {
        IB_WR_RDMA_READ,
        IB_WR_ATOMIC_CMP_AND_SWP,
        IB_WR_ATOMIC_FETCH_AND_ADD,
-       IB_WR_LSO
+       IB_WR_LSO,
+       IB_WR_SEND_WITH_INV,
 };
 
 enum ib_send_flags {
@@ -649,7 +651,10 @@ struct ib_send_wr {
        int                     num_sge;
        enum ib_wr_opcode       opcode;
        int                     send_flags;
-       __be32                  imm_data;
+       union {
+               __be32          imm_data;
+               u32             invalidate_rkey;
+       } ex;
        union {
                struct {
                        u64     remote_addr;
index ffbf22a..8ea283e 100644 (file)
@@ -1573,7 +1573,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        send_wr.sg_list = req->rl_send_iov;
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
-       send_wr.imm_data = 0;
        if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
                ib_dma_sync_single_for_device(ia->ri_id->device,
                        req->rl_send_iov[3].addr, req->rl_send_iov[3].length,