aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-01-20 12:39:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-01-20 12:39:45 -0800
commit9c38747f0cdb20516de3d708f39720762786750a (patch)
tree207ec7f9c393a2ac43d58016873d31b4f6ee1feb
parent26e57507a0f04ae0e472afe4799784e2ed19e1b0 (diff)
parent8579538c89e33ce78be2feb41e07489c8cbf8f31 (diff)
downloadwpan-9c38747f0cdb20516de3d708f39720762786750a.tar.gz
Merge tag 'io_uring-6.2-2023-01-20' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: "Fixes for the MSG_RING opcode. Nothing really major: - Fix an overflow missing serialization around posting CQEs to the target ring (me) - Disable MSG_RING on a ring that isn't enabled yet. There's nothing really wrong with allowing it, but 1) it's somewhat odd as nobody can receive them yet, and 2) it means that using the right delivery mechanism might change. As nobody should be sending CQEs to a ring that isn't enabled yet, let's just disable it (Pavel) - Tweak to when we decide to post remotely or not for MSG_RING (Pavel)" * tag 'io_uring-6.2-2023-01-20' of git://git.kernel.dk/linux: io_uring/msg_ring: fix remote queue to disabled ring io_uring/msg_ring: fix flagging remote execution io_uring/msg_ring: fix missing lock on overflow for IOPOLL io_uring/msg_ring: move double lock/unlock helpers higher up
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--io_uring/msg_ring.c130
2 files changed, 84 insertions, 50 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2ac1cd8d23ea6..0a4efada9b3c3 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3674,7 +3674,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
&& !(ctx->flags & IORING_SETUP_R_DISABLED))
- ctx->submitter_task = get_task_struct(current);
+ WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
file = io_uring_get_file(ctx);
if (IS_ERR(file)) {
@@ -3868,7 +3868,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
return -EBADFD;
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task)
- ctx->submitter_task = get_task_struct(current);
+ WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
if (ctx->restrictions.registered)
ctx->restricted = 1;
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 2d3cd945a531b..15602a136821b 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -25,6 +25,28 @@ struct io_msg {
u32 flags;
};
+static void io_double_unlock_ctx(struct io_ring_ctx *octx)
+{
+ mutex_unlock(&octx->uring_lock);
+}
+
+static int io_double_lock_ctx(struct io_ring_ctx *octx,
+ unsigned int issue_flags)
+{
+ /*
+ * To ensure proper ordering between the two ctxs, we can only
+ * attempt a trylock on the target. If that fails and we already have
+ * the source ctx lock, punt to io-wq.
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ if (!mutex_trylock(&octx->uring_lock))
+ return -EAGAIN;
+ return 0;
+ }
+ mutex_lock(&octx->uring_lock);
+ return 0;
+}
+
void io_msg_ring_cleanup(struct io_kiocb *req)
{
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -36,6 +58,29 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
msg->src_file = NULL;
}
+static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
+{
+ if (!target_ctx->task_complete)
+ return false;
+ return current != target_ctx->submitter_task;
+}
+
+static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+{
+ struct io_ring_ctx *ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ struct task_struct *task = READ_ONCE(ctx->submitter_task);
+
+ if (unlikely(!task))
+ return -EOWNERDEAD;
+
+ init_task_work(&msg->tw, func);
+ if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
+ return -EOWNERDEAD;
+
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
static void io_msg_tw_complete(struct callback_head *head)
{
struct io_msg *msg = container_of(head, struct io_msg, tw);
@@ -43,61 +88,54 @@ static void io_msg_tw_complete(struct callback_head *head)
struct io_ring_ctx *target_ctx = req->file->private_data;
int ret = 0;
- if (current->flags & PF_EXITING)
+ if (current->flags & PF_EXITING) {
ret = -EOWNERDEAD;
- else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- ret = -EOVERFLOW;
+ } else {
+ /*
+ * If the target ring is using IOPOLL mode, then we need to be
+ * holding the uring_lock for posting completions. Other ring
+ * types rely on the regular completion locking, which is
+ * handled while posting.
+ */
+ if (target_ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_lock(&target_ctx->uring_lock);
+ if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = -EOVERFLOW;
+ if (target_ctx->flags & IORING_SETUP_IOPOLL)
+ mutex_unlock(&target_ctx->uring_lock);
+ }
if (ret < 0)
req_set_fail(req);
io_req_queue_tw_complete(req, ret);
}
-static int io_msg_ring_data(struct io_kiocb *req)
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+ int ret;
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
+ if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ return -EBADFD;
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL_NO_IPI))
- return -EOWNERDEAD;
-
- atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags);
- return IOU_ISSUE_SKIP_COMPLETE;
- }
-
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
- return 0;
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_complete);
- return -EOVERFLOW;
-}
-
-static void io_double_unlock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
-{
- mutex_unlock(&octx->uring_lock);
-}
-
-static int io_double_lock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
-{
- /*
- * To ensure proper ordering between the two ctxs, we can only
- * attempt a trylock on the target. If that fails and we already have
- * the source ctx lock, punt to io-wq.
- */
- if (!(issue_flags & IO_URING_F_UNLOCKED)) {
- if (!mutex_trylock(&octx->uring_lock))
+ ret = -EOVERFLOW;
+ if (target_ctx->flags & IORING_SETUP_IOPOLL) {
+ if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
return -EAGAIN;
- return 0;
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = 0;
+ io_double_unlock_ctx(target_ctx);
+ } else {
+ if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ ret = 0;
}
- mutex_lock(&octx->uring_lock);
- return 0;
+ return ret;
}
static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
@@ -148,7 +186,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
- io_double_unlock_ctx(target_ctx, issue_flags);
+ io_double_unlock_ctx(target_ctx);
return ret;
}
@@ -174,6 +212,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
if (target_ctx == ctx)
return -EINVAL;
+ if (target_ctx->flags & IORING_SETUP_R_DISABLED)
+ return -EBADFD;
if (!src_file) {
src_file = io_msg_grab_file(req, issue_flags);
if (!src_file)
@@ -182,14 +222,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
req->flags |= REQ_F_NEED_CLEANUP;
}
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_fd_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL))
- return -EOWNERDEAD;
-
- return IOU_ISSUE_SKIP_COMPLETE;
- }
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_fd_complete);
return io_msg_install_complete(req, issue_flags);
}
@@ -224,7 +258,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
switch (msg->cmd) {
case IORING_MSG_DATA:
- ret = io_msg_ring_data(req);
+ ret = io_msg_ring_data(req, issue_flags);
break;
case IORING_MSG_SEND_FD:
ret = io_msg_send_fd(req, issue_flags);