[PATCH][24/26] IB/mthca: QP locking optimization

From: Roland Dreier
Date: Thu Mar 03 2005 - 18:45:39 EST


From: Michael S. Tsirkin <mst@xxxxxxxxxxxxxx>

1. Split the QP spinlock into separate send and receive locks.

The only place where we have to lock both is upon modify_qp, and
that is not on data path.

2. Avoid taking any QP locks when polling CQ.

This last part is achieved by getting rid of the cur field in
mthca_wq, and calculating the number of outstanding WQEs by
comparing the head and tail fields. head is only updated by
post, tail is only updated by poll.

In a rare case where an overrun is detected, a CQ is locked and the
overrun condition is re-tested, to avoid any potential for stale
tail values.

Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxxxxxx>
Signed-off-by: Roland Dreier <roland@xxxxxxxxxxx>


--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-03-03 14:13:01.214633912 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-03-03 14:13:03.417155819 -0800
@@ -423,15 +423,6 @@
is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;

if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
- if (*cur_qp) {
- if (*freed) {
- wmb();
- update_cons_index(dev, cq, *freed);
- *freed = 0;
- }
- spin_unlock(&(*cur_qp)->lock);
- }
-
/*
* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
@@ -446,8 +437,6 @@
err = -EINVAL;
goto out;
}
-
- spin_lock(&(*cur_qp)->lock);
}

entry->qp_num = (*cur_qp)->qpn;
@@ -465,9 +454,9 @@
}

if (wq->last_comp < wqe_index)
- wq->cur -= wqe_index - wq->last_comp;
+ wq->tail += wqe_index - wq->last_comp;
else
- wq->cur -= wq->max - wq->last_comp + wqe_index;
+ wq->tail += wqe_index + wq->max - wq->last_comp;

wq->last_comp = wqe_index;

@@ -551,9 +540,6 @@
update_cons_index(dev, cq, freed);
}

- if (qp)
- spin_unlock(&qp->lock);
-
spin_unlock_irqrestore(&cq->lock, flags);

return err == 0 || err == -EAGAIN ? npolled : err;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-03-03 14:13:02.120437293 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.h 2005-03-03 14:13:03.416156036 -0800
@@ -166,21 +166,22 @@
};

struct mthca_wq {
- int max;
- int cur;
- int next;
- int last_comp;
- void *last;
- int max_gs;
- int wqe_shift;
+ spinlock_t lock;
+ int max;
+ unsigned next_ind;
+ unsigned last_comp;
+ unsigned head;
+ unsigned tail;
+ void *last;
+ int max_gs;
+ int wqe_shift;

- int db_index; /* Arbel only */
- u32 *db;
+ int db_index; /* Arbel only */
+ u32 *db;
};

struct mthca_qp {
struct ib_qp ibqp;
- spinlock_t lock;
atomic_t refcount;
u32 qpn;
int is_direct;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-03-03 14:13:02.567340285 -0800
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-03-03 14:13:03.418155602 -0800
@@ -577,9 +577,11 @@
else
cur_state = attr->cur_qp_state;
} else {
- spin_lock_irq(&qp->lock);
+ spin_lock_irq(&qp->sq.lock);
+ spin_lock(&qp->rq.lock);
cur_state = qp->state;
- spin_unlock_irq(&qp->lock);
+ spin_unlock(&qp->rq.lock);
+ spin_unlock_irq(&qp->sq.lock);
}

if (attr_mask & IB_QP_STATE) {
@@ -1076,6 +1078,16 @@
}
}

+static void mthca_wq_init(struct mthca_wq* wq)
+{
+ spin_lock_init(&wq->lock);
+ wq->next_ind = 0;
+ wq->last_comp = wq->max - 1;
+ wq->head = 0;
+ wq->tail = 0;
+ wq->last = NULL;
+}
+
static int mthca_alloc_qp_common(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -1087,20 +1099,13 @@
int ret;
int i;

- spin_lock_init(&qp->lock);
atomic_set(&qp->refcount, 1);
qp->state = IB_QPS_RESET;
qp->atomic_rd_en = 0;
qp->resp_depth = 0;
qp->sq_policy = send_policy;
- qp->rq.cur = 0;
- qp->sq.cur = 0;
- qp->rq.next = 0;
- qp->sq.next = 0;
- qp->rq.last_comp = qp->rq.max - 1;
- qp->sq.last_comp = qp->sq.max - 1;
- qp->rq.last = NULL;
- qp->sq.last = NULL;
+ mthca_wq_init(&qp->sq);
+ mthca_wq_init(&qp->rq);

ret = mthca_alloc_memfree(dev, qp);
if (ret)
@@ -1394,6 +1399,24 @@
return 0;
}

+static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
+ struct ib_cq *ib_cq)
+{
+ unsigned cur;
+ struct mthca_cq *cq;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max;
+}
+
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1411,16 +1434,18 @@
int ind;
u8 op0 = 0;

- spin_lock_irqsave(&qp->lock, flags);
+ spin_lock_irqsave(&qp->sq.lock, flags);

/* XXX check that state is OK to post send */

- ind = qp->sq.next;
+ ind = qp->sq.next_ind;

for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (qp->sq.cur + nreq >= qp->sq.max) {
- mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n",
- qp->sq.cur, qp->sq.max, nreq);
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1580,7 +1605,7 @@
if (likely(nreq)) {
u32 doorbell[2];

- doorbell[0] = cpu_to_be32(((qp->sq.next << qp->sq.wqe_shift) +
+ doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
qp->send_wqe_offset) | f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);

@@ -1591,10 +1616,10 @@
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}

- qp->sq.cur += nreq;
- qp->sq.next = ind;
+ qp->sq.next_ind = ind;
+ qp->sq.head += nreq;

- spin_unlock_irqrestore(&qp->lock, flags);
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}

@@ -1613,15 +1638,18 @@
void *wqe;
void *prev_wqe;

- spin_lock_irqsave(&qp->lock, flags);
+ spin_lock_irqsave(&qp->rq.lock, flags);

/* XXX check that state is OK to post receive */

- ind = qp->rq.next;
+ ind = qp->rq.next_ind;

for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) {
- mthca_err(dev, "RQ %06x full\n", qp->qpn);
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1678,7 +1706,7 @@
if (likely(nreq)) {
u32 doorbell[2];

- doorbell[0] = cpu_to_be32((qp->rq.next << qp->rq.wqe_shift) | size0);
+ doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);

wmb();
@@ -1688,10 +1716,10 @@
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}

- qp->rq.cur += nreq;
- qp->rq.next = ind;
+ qp->rq.next_ind = ind;
+ qp->rq.head += nreq;

- spin_unlock_irqrestore(&qp->lock, flags);
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}

@@ -1712,16 +1740,18 @@
int ind;
u8 op0 = 0;

- spin_lock_irqsave(&qp->lock, flags);
+ spin_lock_irqsave(&qp->sq.lock, flags);

/* XXX check that state is OK to post send */

- ind = qp->sq.next & (qp->sq.max - 1);
+ ind = qp->sq.head & (qp->sq.max - 1);

for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (qp->sq.cur + nreq >= qp->sq.max) {
- mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n",
- qp->sq.cur, qp->sq.max, nreq);
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1831,19 +1861,18 @@
u32 doorbell[2];

doorbell[0] = cpu_to_be32((nreq << 24) |
- ((qp->sq.next & 0xffff) << 8) |
+ ((qp->sq.head & 0xffff) << 8) |
f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);

- qp->sq.cur += nreq;
- qp->sq.next += nreq;
+ qp->sq.head += nreq;

/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
- *qp->sq.db = cpu_to_be32(qp->sq.next & 0xffff);
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);

/*
* Make sure doorbell record is written before we
@@ -1855,7 +1884,7 @@
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}

- spin_unlock_irqrestore(&qp->lock, flags);
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}

@@ -1871,15 +1900,18 @@
int i;
void *wqe;

- spin_lock_irqsave(&qp->lock, flags);
+ spin_lock_irqsave(&qp->rq.lock, flags);

/* XXX check that state is OK to post receive */

- ind = qp->rq.next & (qp->rq.max - 1);
+ ind = qp->rq.head & (qp->rq.max - 1);

for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) {
- mthca_err(dev, "RQ %06x full\n", qp->qpn);
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1921,18 +1953,17 @@
}
out:
if (likely(nreq)) {
- qp->rq.cur += nreq;
- qp->rq.next += nreq;
+ qp->rq.head += nreq;

/*
* Make sure that descriptors are written before
* doorbell record.
*/
wmb();
- *qp->rq.db = cpu_to_be32(qp->rq.next & 0xffff);
+ *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
}

- spin_unlock_irqrestore(&qp->lock, flags);
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/