aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2008-11-25 08:40:07 +0200
committerRoland Dreier <rolandd@cisco.com>2008-11-25 14:52:56 -0800
commitcbe93dcaf11f6c73d68a90067e83d145bb93f94b (patch)
treee7258935a88600031d118fbbc8f23a6fdedc789e
parent80c5a3720ed111b202809671b3614f84cb1d3724 (diff)
downloadlibmlx4-cbe93dcaf11f6c73d68a90067e83d145bb93f94b.tar.gz
Fix race between create QP and destroy QP
There is a race in libmlx4 because mlx4_create_qp() and mlx4_destroy_qp() are not atomic WRT each other. If one thread is destroying a QP while another is creating a QP, the following can happen: the destroying thread can be scheduled out after it has deleted the QP from kernel space, but before it has cleared it from userspace store (mlx4_clear_qp()). If the other thread creates a QP during this break, it gets the same QP base number and overwrites the destroyed QP's entry with mlx4_store_qp(). When the destroying thread resumes, it clears the new entry from the userspace store via mlx4_clear_qp. Fix this by expanding where qp_table_mutex is held to serialize the full create and destroy operations against each other. Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--src/qp.c18
-rw-r--r--src/verbs.c10
2 files changed, 12 insertions, 16 deletions
diff --git a/src/qp.c b/src/qp.c
index bb98c09..d194ae3 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -667,37 +667,25 @@ struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp)
{
int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
- int ret = 0;
-
- pthread_mutex_lock(&ctx->qp_table_mutex);
if (!ctx->qp_table[tind].refcnt) {
ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
sizeof (struct mlx4_qp *));
- if (!ctx->qp_table[tind].table) {
- ret = -1;
- goto out;
- }
+ if (!ctx->qp_table[tind].table)
+ return -1;
}
++ctx->qp_table[tind].refcnt;
ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
-
-out:
- pthread_mutex_unlock(&ctx->qp_table_mutex);
- return ret;
+ return 0;
}
void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn)
{
int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
- pthread_mutex_lock(&ctx->qp_table_mutex);
-
if (!--ctx->qp_table[tind].refcnt)
free(ctx->qp_table[tind].table);
else
ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
-
- pthread_mutex_unlock(&ctx->qp_table_mutex);
}
diff --git a/src/verbs.c b/src/verbs.c
index 3935ed5..cc179a0 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -452,6 +452,8 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */
memset(cmd.reserved, 0, sizeof cmd.reserved);
+ pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+
ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
&resp, sizeof resp);
if (ret)
@@ -460,6 +462,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
if (ret)
goto err_destroy;
+ pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
qp->rq.max_gs = attr->cap.max_recv_sge;
@@ -477,6 +480,7 @@ err_destroy:
ibv_cmd_destroy_qp(&qp->ibv_qp);
err_rq_db:
+ pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
if (!attr->srq)
mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
@@ -580,9 +584,12 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
struct mlx4_qp *qp = to_mqp(ibqp);
int ret;
+ pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
ret = ibv_cmd_destroy_qp(ibqp);
- if (ret)
+ if (ret) {
+ pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
return ret;
+ }
mlx4_lock_cqs(ibqp);
@@ -594,6 +601,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
mlx4_unlock_cqs(ibqp);
+ pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
if (!ibqp->srq)
mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);