IB/ipath: Duplicate RDMA reads can cause responder to NAK inappropriately
Ralph Campbell [Mon, 18 Jun 2007 21:24:44 +0000 (14:24 -0700)]
A duplicate RDMA read request can fool the responder into NAKing a new
RDMA read request because the responder wasn't keeping track of
whether the queue of RDMA read requests had been sent at least once.
For example, requester sends 4 2K byte RDMA read requests, times out,
and resends the first, then sees the 4 responses, then sends a 5th
RDMA read or atomic operation.  The responder sees the 4 requests,
sends 4 responses, sees the resent 1st request, rewinds the queue,
then sees the 5th request but thinks the queue is full and that the
requester is invalidly sending a 5th new request.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_verbs.h

index 014d811..9e71239 100644 (file)
@@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                        if (len > pmtu) {
                                len = pmtu;
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-                       } else
+                       } else {
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+                               e->sent = 1;
+                       }
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
                        qp->s_ack_rdma_psn = e->psn;
@@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                                cpu_to_be32(e->atomic_data);
                        hwords += sizeof(ohdr->u.at) / sizeof(u32);
                        bth2 = e->psn;
+                       e->sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                break;
@@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
                        qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -1479,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
        spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+       unsigned long flags;
+       unsigned next;
+
+       next = n + 1;
+       if (next > IPATH_MAX_RDMA_ATOMIC)
+               next = 0;
+       spin_lock_irqsave(&qp->s_lock, flags);
+       if (n == qp->s_tail_ack_queue) {
+               qp->s_tail_ack_queue = next;
+               qp->s_ack_state = OP(ACKNOWLEDGE);
+       }
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
 /**
  * ipath_rc_rcv - process an incoming RC packet
  * @dev: the device this packet came in on
@@ -1741,8 +1761,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                e = &qp->s_ack_queue[qp->r_head_ack_queue];
                /* RETH comes after BTH */
                if (!header_in_data)
@@ -1777,6 +1800,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        e->rdma_sge.sge.sge_length = 0;
                }
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn;
                /*
                 * We need to increment the MSN here instead of when we
@@ -1812,8 +1836,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                if (!header_in_data)
                        ateth = &ohdr->u.atomic_eth;
                else
@@ -1838,6 +1865,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                                      be64_to_cpu(ateth->compare_data),
                                      sdata);
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn & IPATH_PSN_MASK;
                qp->r_msn++;
                qp->r_psn++;
index 088b837..458f822 100644 (file)
@@ -321,6 +321,7 @@ struct ipath_sge_state {
  */
 struct ipath_ack_entry {
        u8 opcode;
+       u8 sent;
        u32 psn;
        union {
                struct ipath_sge_state rdma_sge;