aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@mellanox.co.il>2006-01-07 00:56:43 +0000
committerRoland Dreier <rolandd@cisco.com>2006-11-09 19:57:04 -0800
commit55be91f0eedd1b14e4c9f1b5de0e1e2a30bde871 (patch)
treee2ff69ecdcc9364393348b766a614cc1809bf4c7
parent1749b1a0a8219f484ea8a5daa3d9f541dde3d620 (diff)
downloadlibmthca-55be91f0eedd1b14e4c9f1b5de0e1e2a30bde871.tar.gz
Fix race with QP destruction
Jack Morgenstein has discovered the following race condition in libmthca: Thread A destroys QP A at the kernel side by calling ibv_cmd_destroy_qp, but its time-slice is over before removing it from the user-space qp_table removal. Thread B allocates QP B, receiving a QP number that matches the just-destroyed QP A in the low 16 bits. Thread B will now over-write the slot in qp_table which was used for QP A. Thread A wakes up and clears qp_table slot, in effect removing QP B from qp_table. As a solution, remove the QP from qp_table before calling ibv_cmd_destroy_qp. This also makes sense since operations are performed in the reverse order in create_qp. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--ChangeLog23
-rw-r--r--src/verbs.c16
2 files changed, 35 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index df45395..b7a2657 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2006-01-06 Michael S. Tsirkin <mst@mellanox.co.il>
+
+ * src/verbs.c (mthca_destroy_qp): Jack Morgenstein has discovered
+ the following race condition in libmthca:
+
+ Thread A destroys QP A at the kernel side by calling
+ ibv_cmd_destroy_qp, but its time-slice is over before removing it
+ from the user-space qp_table removal.
+
+ Thread B allocates QP B, receiving a QP number that matches the
+ just-destroyed QP A in the low 16 bits. Thread B will now
+ over-write the slot in qp_table which was used for QP A.
+
+ Thread A wakes up and clears qp_table slot, in effect removing QP
+ B from qp_table.
+
+ As a solution, remove the QP from qp_table before calling
+ ibv_cmd_destroy_qp. This also makes sense since operations are
+ performed in the reverse order in create_qp.
+
+ * src/cq.c (handle_error_cqe): Fill in vendor_err field for
+ completions with error.
+
2006-01-05 Jack Morgenstein <jackm@mellanox.co.il>
* src/verbs.c (mthca_destroy_qp, mthca_destroy_srq): Free QP/SRQ
diff --git a/src/verbs.c b/src/verbs.c
index 5778280..1b63125 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -530,10 +530,6 @@ int mthca_destroy_qp(struct ibv_qp *qp)
{
int ret;
- ret = ibv_cmd_destroy_qp(qp);
- if (ret)
- return ret;
-
mthca_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
qp->srq ? to_msrq(qp->srq) : NULL);
if (qp->send_cq != qp->recv_cq)
@@ -547,6 +543,18 @@ int mthca_destroy_qp(struct ibv_qp *qp)
pthread_spin_unlock(&to_mcq(qp->recv_cq)->lock);
pthread_spin_unlock(&to_mcq(qp->send_cq)->lock);
+ ret = ibv_cmd_destroy_qp(qp);
+ if (ret) {
+ pthread_spin_lock(&to_mcq(qp->send_cq)->lock);
+ if (qp->send_cq != qp->recv_cq)
+ pthread_spin_lock(&to_mcq(qp->recv_cq)->lock);
+ mthca_store_qp(to_mctx(qp->context), qp->qp_num, to_mqp(qp));
+ if (qp->send_cq != qp->recv_cq)
+ pthread_spin_unlock(&to_mcq(qp->recv_cq)->lock);
+ pthread_spin_unlock(&to_mcq(qp->send_cq)->lock);
+ return ret;
+ }
+
if (mthca_is_memfree(qp->context)) {
mthca_free_db(to_mctx(qp->context)->db_tab, MTHCA_DB_TYPE_RQ,
to_mqp(qp)->rq.db_index);