public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Caleb Sander Mateos <csander@purestorage.com>
To: Ming Lei <ming.lei@redhat.com>, Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org,
	Caleb Sander Mateos <csander@purestorage.com>
Subject: [PATCH v2 11/14] ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task
Date: Fri, 20 Jun 2025 09:10:05 -0600	[thread overview]
Message-ID: <20250620151008.3976463-12-csander@purestorage.com> (raw)
In-Reply-To: <20250620151008.3976463-1-csander@purestorage.com>

ublk_register_io_buf() performs an expensive atomic refcount increment,
as well as a lot of pointer chasing to look up the struct request.

Create a separate ublk_daemon_register_io_buf() for the daemon task to
call. Initialize ublk_io's reference count to a large number, introduce
a field task_registered_buffers to count the buffers registered on the
daemon task, and atomically subtract the large number minus
task_registered_buffers in ublk_commit_and_fetch().

Also obtain the struct request directly from ublk_io's req field instead
of looking it up on the tagset.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
---
 drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index f53618391141..b2925e15279a 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -146,10 +146,17 @@ struct ublk_uring_cmd_pdu {
 #define UBLK_IO_FLAG_AUTO_BUF_REG 	0x10
 
 /* atomic RW with ubq->cancel_lock */
 #define UBLK_IO_FLAG_CANCELED	0x80000000
 
+/*
+ * Initialize refcount to a large number to include any registered buffers.
+ * UBLK_IO_COMMIT_AND_FETCH_REQ will release these references minus those for
+ * any buffers registered on the io daemon task.
+ */
+#define UBLK_REFCOUNT_INIT (REFCOUNT_MAX / 2)
+
 struct ublk_io {
 	/* userspace buffer address from io cmd */
 	__u64	addr;
 	unsigned int flags;
 	int res;
@@ -164,18 +171,21 @@ struct ublk_io {
 	struct task_struct *task;
 
 	/*
 	 * The number of uses of this I/O by the ublk server
 	 * if user copy or zero copy are enabled:
-	 * - 1 from dispatch to the server until UBLK_IO_COMMIT_AND_FETCH_REQ
+	 * - UBLK_REFCOUNT_INIT from dispatch to the server
+	 *   until UBLK_IO_COMMIT_AND_FETCH_REQ
 	 * - 1 for each inflight ublk_ch_{read,write}_iter() call
-	 * - 1 for each io_uring registered buffer
+	 * - 1 for each io_uring registered buffer not registered on task
 	 * The I/O can only be completed once all references are dropped.
 	 * User copy and buffer registration operations are only permitted
 	 * if the reference count is nonzero.
 	 */
 	refcount_t ref;
+	/* Count of buffers registered on task and not yet unregistered */
+	unsigned task_registered_buffers;
 
 	/* auto-registered buffer, valid if UBLK_IO_FLAG_AUTO_BUF_REG is set */
 	u16 buf_index;
 	void *buf_ctx_handle;
 };
@@ -684,11 +694,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
 
 static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
 		struct ublk_io *io)
 {
 	if (ublk_need_req_ref(ubq))
-		refcount_set(&io->ref, 1);
+		refcount_set(&io->ref, UBLK_REFCOUNT_INIT);
 }
 
 static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
 		struct ublk_io *io)
 {
@@ -707,10 +717,19 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
 	} else {
 		__ublk_complete_rq(req);
 	}
 }
 
+static inline void ublk_sub_req_ref(struct ublk_io *io, struct request *req)
+{
+	unsigned sub_refs = UBLK_REFCOUNT_INIT - io->task_registered_buffers;
+
+	io->task_registered_buffers = 0;
+	if (refcount_sub_and_test(sub_refs, &io->ref))
+		__ublk_complete_rq(req);
+}
+
 static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
 {
 	return ubq->flags & UBLK_F_NEED_GET_DATA;
 }
 
@@ -1188,11 +1207,10 @@ ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, struct ublk_io *io)
 {
 	unsigned tag = io - ubq->ios;
 	struct ublksrv_io_desc *iod = ublk_get_iod(ubq, tag);
 
 	iod->op_flags |= UBLK_IO_F_NEED_REG_BUF;
-	refcount_set(&io->ref, 1);
 }
 
 static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
 			      struct ublk_io *io, unsigned int issue_flags)
 {
@@ -1207,13 +1225,12 @@ static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
 			return true;
 		}
 		blk_mq_end_request(req, BLK_STS_IOERR);
 		return false;
 	}
-	/* one extra reference is dropped by ublk_io_release */
-	refcount_set(&io->ref, 2);
 
+	io->task_registered_buffers = 1;
 	io->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd);
 	/* store buffer index in request payload */
 	io->buf_index = pdu->buf.index;
 	io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
 	return true;
@@ -1221,14 +1238,14 @@ static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req,
 
 static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq,
 				   struct request *req, struct ublk_io *io,
 				   unsigned int issue_flags)
 {
+	ublk_init_req_ref(ubq, io);
 	if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req))
 		return ublk_auto_buf_reg(ubq, req, io, issue_flags);
 
-	ublk_init_req_ref(ubq, io);
 	return true;
 }
 
 static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
 			  struct ublk_io *io)
@@ -1488,10 +1505,11 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
 			put_task_struct(io->task);
 			io->task = NULL;
 		}
 
 		WARN_ON_ONCE(refcount_read(&io->ref));
+		WARN_ON_ONCE(io->task_registered_buffers);
 	}
 }
 
 static int ublk_ch_open(struct inode *inode, struct file *filp)
 {
@@ -2023,10 +2041,39 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
 	}
 
 	return 0;
 }
 
+static int
+ublk_daemon_register_io_buf(struct io_uring_cmd *cmd,
+			    const struct ublk_queue *ubq, struct ublk_io *io,
+			    unsigned index, unsigned issue_flags)
+{
+	unsigned new_registered_buffers;
+	struct request *req = io->req;
+	int ret;
+
+	/*
+	 * Ensure there are still references for ublk_sub_req_ref() to release.
+	 * If not, fall back on the thread-safe buffer registration.
+	 */
+	new_registered_buffers = io->task_registered_buffers + 1;
+	if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT))
+		return ublk_register_io_buf(cmd, ubq, io, index, issue_flags);
+
+	if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req))
+		return -EINVAL;
+
+	ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
+				      issue_flags);
+	if (ret)
+		return ret;
+
+	io->task_registered_buffers = new_registered_buffers;
+	return 0;
+}
+
 static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
 				  const struct ublk_device *ub,
 				  unsigned int index, unsigned int issue_flags)
 {
 	if (!(ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY))
@@ -2146,11 +2193,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
 		req->__sector = ub_cmd->zone_append_lba;
 
 	if (unlikely(blk_should_fake_timeout(req->q)))
 		return 0;
 
-	ublk_put_req_ref(ubq, io, req);
+	if (ublk_need_req_ref(ubq))
+		ublk_sub_req_ref(io, req);
+	else
+		__ublk_complete_rq(req);
 	return 0;
 }
 
 static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io)
 {
@@ -2244,11 +2294,12 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
 			^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
 		goto out;
 
 	switch (_IOC_NR(cmd_op)) {
 	case UBLK_IO_REGISTER_IO_BUF:
-		return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, issue_flags);
+		return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr,
+						   issue_flags);
 	case UBLK_IO_COMMIT_AND_FETCH_REQ:
 		ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags);
 		if (ret)
 			goto out;
 
@@ -2473,10 +2524,11 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
 	for (i = 0; i < ubq->q_depth; i++) {
 		struct ublk_io *io = &ubq->ios[i];
 		if (io->task)
 			put_task_struct(io->task);
 		WARN_ON_ONCE(refcount_read(&io->ref));
+		WARN_ON_ONCE(io->task_registered_buffers);
 	}
 
 	if (ubq->io_cmd_buf)
 		free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
 }
-- 
2.45.2


  parent reply	other threads:[~2025-06-20 15:10 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-20 15:09 [PATCH v2 00/14] ublk: allow off-daemon zero-copy buffer registration Caleb Sander Mateos
2025-06-20 15:09 ` [PATCH v2 01/14] ublk: use vmalloc for ublk_device's __queues Caleb Sander Mateos
2025-06-23  7:20   ` Ming Lei
2025-06-20 15:09 ` [PATCH v2 02/14] ublk: remove struct ublk_rq_data Caleb Sander Mateos
2025-06-23  8:02   ` Ming Lei
2025-06-20 15:09 ` [PATCH v2 03/14] ublk: check cmd_op first Caleb Sander Mateos
2025-06-20 15:09 ` [PATCH v2 04/14] ublk: handle UBLK_IO_FETCH_REQ earlier Caleb Sander Mateos
2025-06-20 15:09 ` [PATCH v2 05/14] ublk: remove task variable from __ublk_ch_uring_cmd() Caleb Sander Mateos
2025-06-20 15:10 ` [PATCH v2 06/14] ublk: consolidate UBLK_IO_FLAG_{ACTIVE,OWNED_BY_SRV} checks Caleb Sander Mateos
2025-06-20 15:10 ` [PATCH v2 07/14] ublk: move ublk_prep_cancel() to case UBLK_IO_COMMIT_AND_FETCH_REQ Caleb Sander Mateos
2025-06-20 15:10 ` [PATCH v2 08/14] ublk: don't take ublk_queue in ublk_unregister_io_buf() Caleb Sander Mateos
2025-06-23  8:29   ` Ming Lei
2025-06-20 15:10 ` [PATCH v2 09/14] ublk: allow UBLK_IO_(UN)REGISTER_IO_BUF on any task Caleb Sander Mateos
2025-06-23  9:07   ` Ming Lei
2025-06-20 15:10 ` [PATCH v2 10/14] ublk: return early if blk_should_fake_timeout() Caleb Sander Mateos
2025-06-23  9:08   ` Ming Lei
2025-06-20 15:10 ` Caleb Sander Mateos [this message]
2025-06-23  9:44   ` [PATCH v2 11/14] ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task Ming Lei
2025-06-20 15:10 ` [PATCH v2 12/14] ublk: optimize UBLK_IO_UNREGISTER_IO_BUF " Caleb Sander Mateos
2025-06-23  9:45   ` Ming Lei
2025-06-20 15:10 ` [PATCH v2 13/14] ublk: remove ubq checks from ublk_{get,put}_req_ref() Caleb Sander Mateos
2025-06-23  9:49   ` Ming Lei
2025-06-20 15:10 ` [PATCH v2 14/14] ublk: cache-align struct ublk_io Caleb Sander Mateos
2025-06-23  9:49   ` Ming Lei
2025-06-27  0:47 ` [PATCH v2 00/14] ublk: allow off-daemon zero-copy buffer registration Jens Axboe
2025-06-27  0:48   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250620151008.3976463-12-csander@purestorage.com \
    --to=csander@purestorage.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox