All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Pavel Begunkov <asml.silence@gmail.com>, io-uring@vger.kernel.org
Subject: Re: [PATCH 2/4] io_uring: move locking inside overflow posting
Date: Wed, 14 May 2025 15:52:21 -0600	[thread overview]
Message-ID: <a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk> (raw)
In-Reply-To: <1644225f-36c9-4abf-8da3-cc853cdab0e8@kernel.dk>

Since sometimes it's easier to talk in code that in English, something
like the below (just applied on top, and utterly untested) I think is
much cleaner. Didn't spend a lot of time on it, I'm sure it could get
condensed down some more with a helper or something. But it keeps the
locking in the caller, while still allowing GFP_KERNEL alloc for
lockless_cq.

Somewhat unrelated, but also fills in an io_cqe and passes in for the
non-cqe32 parts, just for readability's sake.


diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index da1075b66a87..9b6d09633a29 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -744,44 +744,12 @@ static bool __io_cqring_event_overflow(struct io_ring_ctx *ctx,
 	return true;
 }
 
-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, bool locked,
-				     u64 user_data, s32 res, u32 cflags,
-				     u64 extra1, u64 extra2)
+static void io_cqring_event_overflow(struct io_ring_ctx *ctx,
+				     struct io_overflow_cqe *ocqe)
 {
-	struct io_overflow_cqe *ocqe;
-	size_t ocq_size = sizeof(struct io_overflow_cqe);
-	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
-	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-	bool queued;
-
-	io_lockdep_assert_cq_locked(ctx);
-
-	if (is_cqe32)
-		ocq_size += sizeof(struct io_uring_cqe);
-	if (locked)
-		gfp = GFP_ATOMIC | __GFP_ACCOUNT;
-
-	ocqe = kmalloc(ocq_size, gfp);
-	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
-
-	if (ocqe) {
-		ocqe->cqe.user_data = user_data;
-		ocqe->cqe.res = res;
-		ocqe->cqe.flags = cflags;
-		if (is_cqe32) {
-			ocqe->cqe.big_cqe[0] = extra1;
-			ocqe->cqe.big_cqe[1] = extra2;
-		}
-	}
-
-	if (locked) {
-		queued = __io_cqring_event_overflow(ctx, ocqe);
-	} else {
-		spin_lock(&ctx->completion_lock);
-		queued = __io_cqring_event_overflow(ctx, ocqe);
-		spin_unlock(&ctx->completion_lock);
-	}
-	return queued;
+	spin_lock(&ctx->completion_lock);
+	__io_cqring_event_overflow(ctx, ocqe);
+	spin_unlock(&ctx->completion_lock);
 }
 
 /*
@@ -842,15 +810,49 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
 	return false;
 }
 
+static struct io_overflow_cqe *io_get_ocqe(struct io_ring_ctx *ctx,
+					   struct io_cqe *cqe, u64 extra1,
+					   u64 extra2, gfp_t gfp)
+{
+	size_t ocq_size = sizeof(struct io_overflow_cqe);
+	bool is_cqe32 = ctx->flags & IORING_SETUP_CQE32;
+	struct io_overflow_cqe *ocqe;
+
+	if (is_cqe32)
+		ocq_size += sizeof(struct io_uring_cqe);
+
+	ocqe = kmalloc(ocq_size, gfp);
+	trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
+	if (ocqe) {
+		ocqe->cqe.user_data = cqe->user_data;
+		ocqe->cqe.res = cqe->res;
+		ocqe->cqe.flags = cqe->flags;
+		if (is_cqe32) {
+			ocqe->cqe.big_cqe[0] = extra1;
+			ocqe->cqe.big_cqe[1] = extra2;
+		}
+	}
+
+	return ocqe;
+}
+
 bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 {
 	bool filled;
 
 	io_cq_lock(ctx);
 	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
-	if (!filled)
-		filled = io_cqring_event_overflow(ctx, true,
-						  user_data, res, cflags, 0, 0);
+	if (unlikely(!filled)) {
+		struct io_cqe cqe = {
+			.user_data	= user_data,
+			.res		= res,
+			.flags		= cflags
+		};
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_ATOMIC);
+		filled = __io_cqring_event_overflow(ctx, ocqe);
+	}
 	io_cq_unlock_post(ctx);
 	return filled;
 }
@@ -864,8 +866,17 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
 	lockdep_assert_held(&ctx->uring_lock);
 	lockdep_assert(ctx->lockless_cq);
 
-	if (!io_fill_cqe_aux(ctx, user_data, res, cflags))
-		io_cqring_event_overflow(ctx, false, user_data, res, cflags, 0, 0);
+	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
+		struct io_cqe cqe = {
+			.user_data	= user_data,
+			.res		= res,
+			.flags		= cflags
+		};
+		struct io_overflow_cqe *ocqe;
+
+		ocqe = io_get_ocqe(ctx, &cqe, 0, 0, GFP_KERNEL);
+		io_cqring_event_overflow(ctx, ocqe);
+	}
 
 	ctx->submit_state.cq_flush = true;
 }
@@ -1437,6 +1448,20 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
 	} while (node);
 }
 
+static __cold void io_cqe_overflow_fill(struct io_ring_ctx *ctx,
+					struct io_kiocb *req)
+{
+	gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
+	struct io_overflow_cqe *ocqe;
+
+	ocqe = io_get_ocqe(ctx, &req->cqe, req->big_cqe.extra1, req->big_cqe.extra2, gfp);
+	if (ctx->lockless_cq)
+		io_cqring_event_overflow(ctx, ocqe);
+	else
+		__io_cqring_event_overflow(ctx, ocqe);
+	memset(&req->big_cqe, 0, sizeof(req->big_cqe));
+}
+
 void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 	__must_hold(&ctx->uring_lock)
 {
@@ -1453,17 +1478,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 		 * will go through the io-wq retry machinery and post one
 		 * later.
 		 */
-		if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
-		    unlikely(!io_fill_cqe_req(ctx, req))) {
-			bool locked = !ctx->lockless_cq;
-
-			io_cqring_event_overflow(req->ctx, locked,
-						req->cqe.user_data,
-						req->cqe.res, req->cqe.flags,
-						req->big_cqe.extra1,
-						req->big_cqe.extra2);
-			memset(&req->big_cqe, 0, sizeof(req->big_cqe));
-		}
+		if (req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE))
+			continue;
+		if (unlikely(!io_fill_cqe_req(ctx, req)))
+			io_cqe_overflow_fill(ctx, req);
 	}
 	__io_cq_unlock_post(ctx);
 

-- 
Jens Axboe

  reply	other threads:[~2025-05-14 21:52 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-14  8:07 [PATCH 0/4] overflow completion enhancements Pavel Begunkov
2025-05-14  8:07 ` [PATCH 1/4] io_uring: open code io_req_cqe_overflow() Pavel Begunkov
2025-05-14  8:07 ` [PATCH 2/4] io_uring: move locking inside overflow posting Pavel Begunkov
2025-05-14 16:42   ` Jens Axboe
2025-05-14 17:18     ` Pavel Begunkov
2025-05-14 19:25       ` Jens Axboe
2025-05-14 20:00         ` Pavel Begunkov
2025-05-14 20:05           ` Jens Axboe
2025-05-14 21:52             ` Jens Axboe [this message]
2025-05-15 11:04               ` Pavel Begunkov
2025-05-14  8:07 ` [PATCH 3/4] io_uring: alloc overflow entry before locking Pavel Begunkov
2025-05-14  8:07 ` [PATCH 4/4] io_uring: add lockdep warning for overflow posting Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a645a6a2-d722-4ef1-bdfd-3701ab315584@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=asml.silence@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.