* [PATCH V2 1/5] ublk: reorder tag_set initialization before queue allocation
2025-10-28 8:56 [PATCH V2 0/5] ublk: NUMA-aware memory allocation Ming Lei
@ 2025-10-28 8:56 ` Ming Lei
2025-10-28 8:56 ` [PATCH V2 2/5] ublk: implement NUMA-aware memory allocation Ming Lei
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-10-28 8:56 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Move ublk_add_tag_set() before ublk_init_queues() in the device
initialization path. This allows us to use the blk-mq CPU-to-queue
mapping established by the tag_set to determine the appropriate
NUMA node for each queue allocation.
The error handling paths are also reordered accordingly.
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 0c74a41a6753..2569566bf5e6 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -3178,17 +3178,17 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
ub->dev_info.nr_hw_queues, nr_cpu_ids);
ublk_align_max_io_size(ub);
- ret = ublk_init_queues(ub);
+ ret = ublk_add_tag_set(ub);
if (ret)
goto out_free_dev_number;
- ret = ublk_add_tag_set(ub);
+ ret = ublk_init_queues(ub);
if (ret)
- goto out_deinit_queues;
+ goto out_free_tag_set;
ret = -EFAULT;
if (copy_to_user(argp, &ub->dev_info, sizeof(info)))
- goto out_free_tag_set;
+ goto out_deinit_queues;
/*
* Add the char dev so that ublksrv daemon can be setup.
@@ -3197,10 +3197,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
ret = ublk_add_chdev(ub);
goto out_unlock;
-out_free_tag_set:
- blk_mq_free_tag_set(&ub->tag_set);
out_deinit_queues:
ublk_deinit_queues(ub);
+out_free_tag_set:
+ blk_mq_free_tag_set(&ub->tag_set);
out_free_dev_number:
ublk_free_dev_number(ub);
out_free_ub:
--
2.47.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH V2 2/5] ublk: implement NUMA-aware memory allocation
2025-10-28 8:56 [PATCH V2 0/5] ublk: NUMA-aware memory allocation Ming Lei
2025-10-28 8:56 ` [PATCH V2 1/5] ublk: reorder tag_set initialization before queue allocation Ming Lei
@ 2025-10-28 8:56 ` Ming Lei
2025-10-28 8:56 ` [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios Ming Lei
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-10-28 8:56 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Implement NUMA-friendly memory allocation for ublk driver to improve
performance on multi-socket systems.
This commit includes the following changes:
1. Convert struct ublk_device to use a flexible array member for the
queues field (using DECLARE_FLEX_ARRAY) instead of a separate
pointer array allocation. This eliminates one level of indirection
and simplifies memory management. The queues array is now allocated
as part of struct ublk_device using struct_size().
2. Rename __queues to queues, dropping the __ prefix since the field is
now accessed directly throughout the codebase rather than only through
the ublk_get_queue() helper.
3. Remove the queue_size field from struct ublk_device as it is no longer
needed.
4. Move queue allocation and deallocation into ublk_init_queue() and
ublk_deinit_queue() respectively, improving encapsulation. This
simplifies ublk_init_queues() and ublk_deinit_queues() to just
iterate and call the per-queue functions.
5. Add ublk_get_queue_numa_node() helper function to determine the
appropriate NUMA node for a queue by finding the first CPU mapped
to that queue via tag_set.map[HCTX_TYPE_DEFAULT].mq_map[] and
converting it to a NUMA node using cpu_to_node(). This function is
called internally by ublk_init_queue() to determine the allocation
node.
6. Allocate each queue structure on its local NUMA node using
kvzalloc_node() in ublk_init_queue().
7. Allocate the I/O command buffer on the same NUMA node using
alloc_pages_node().
This reduces memory access latency on multi-socket NUMA systems by
ensuring each queue's data structures are local to the CPUs that
access them.
Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 84 +++++++++++++++++++++++++---------------
1 file changed, 53 insertions(+), 31 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2569566bf5e6..394e9b5f512f 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -209,9 +209,6 @@ struct ublk_queue {
struct ublk_device {
struct gendisk *ub_disk;
- char *__queues;
-
- unsigned int queue_size;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -239,6 +236,8 @@ struct ublk_device {
bool canceling;
pid_t ublksrv_tgid;
struct delayed_work exit_work;
+
+ DECLARE_FLEX_ARRAY(struct ublk_queue *, queues);
};
/* header of ublk_params */
@@ -781,7 +780,7 @@ static noinline void ublk_put_device(struct ublk_device *ub)
static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
int qid)
{
- return (struct ublk_queue *)&(dev->__queues[qid * dev->queue_size]);
+ return dev->queues[qid];
}
static inline bool ublk_rq_has_data(const struct request *rq)
@@ -2662,9 +2661,13 @@ static const struct file_operations ublk_ch_fops = {
static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
{
- int size = ublk_queue_cmd_buf_size(ub);
- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
- int i;
+ struct ublk_queue *ubq = ub->queues[q_id];
+ int size, i;
+
+ if (!ubq)
+ return;
+
+ size = ublk_queue_cmd_buf_size(ub);
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -2676,57 +2679,76 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id)
if (ubq->io_cmd_buf)
free_pages((unsigned long)ubq->io_cmd_buf, get_order(size));
+
+ kvfree(ubq);
+ ub->queues[q_id] = NULL;
+}
+
+static int ublk_get_queue_numa_node(struct ublk_device *ub, int q_id)
+{
+ unsigned int cpu;
+
+ /* Find first CPU mapped to this queue */
+ for_each_possible_cpu(cpu) {
+ if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[cpu] == q_id)
+ return cpu_to_node(cpu);
+ }
+
+ return NUMA_NO_NODE;
}
static int ublk_init_queue(struct ublk_device *ub, int q_id)
{
- struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ int depth = ub->dev_info.queue_depth;
+ int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
- void *ptr;
+ struct ublk_queue *ubq;
+ struct page *page;
+ int numa_node;
int size;
+ /* Determine NUMA node based on queue's CPU affinity */
+ numa_node = ublk_get_queue_numa_node(ub, q_id);
+
+ /* Allocate queue structure on local NUMA node */
+ ubq = kvzalloc_node(ubq_size, GFP_KERNEL, numa_node);
+ if (!ubq)
+ return -ENOMEM;
+
spin_lock_init(&ubq->cancel_lock);
ubq->flags = ub->dev_info.flags;
ubq->q_id = q_id;
- ubq->q_depth = ub->dev_info.queue_depth;
+ ubq->q_depth = depth;
size = ublk_queue_cmd_buf_size(ub);
- ptr = (void *) __get_free_pages(gfp_flags, get_order(size));
- if (!ptr)
+ /* Allocate I/O command buffer on local NUMA node */
+ page = alloc_pages_node(numa_node, gfp_flags, get_order(size));
+ if (!page) {
+ kvfree(ubq);
return -ENOMEM;
+ }
+ ubq->io_cmd_buf = page_address(page);
- ubq->io_cmd_buf = ptr;
+ ub->queues[q_id] = ubq;
ubq->dev = ub;
return 0;
}
static void ublk_deinit_queues(struct ublk_device *ub)
{
- int nr_queues = ub->dev_info.nr_hw_queues;
int i;
- if (!ub->__queues)
- return;
-
- for (i = 0; i < nr_queues; i++)
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_deinit_queue(ub, i);
- kvfree(ub->__queues);
}
static int ublk_init_queues(struct ublk_device *ub)
{
- int nr_queues = ub->dev_info.nr_hw_queues;
- int depth = ub->dev_info.queue_depth;
- int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
- int i, ret = -ENOMEM;
+ int i, ret;
- ub->queue_size = ubq_size;
- ub->__queues = kvcalloc(nr_queues, ubq_size, GFP_KERNEL);
- if (!ub->__queues)
- return ret;
-
- for (i = 0; i < nr_queues; i++) {
- if (ublk_init_queue(ub, i))
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ ret = ublk_init_queue(ub, i);
+ if (ret)
goto fail;
}
@@ -3128,7 +3150,7 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
goto out_unlock;
ret = -ENOMEM;
- ub = kzalloc(sizeof(*ub), GFP_KERNEL);
+ ub = kzalloc(struct_size(ub, queues, info.nr_hw_queues), GFP_KERNEL);
if (!ub)
goto out_unlock;
mutex_init(&ub->mutex);
--
2.47.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios
2025-10-28 8:56 [PATCH V2 0/5] ublk: NUMA-aware memory allocation Ming Lei
2025-10-28 8:56 ` [PATCH V2 1/5] ublk: reorder tag_set initialization before queue allocation Ming Lei
2025-10-28 8:56 ` [PATCH V2 2/5] ublk: implement NUMA-aware memory allocation Ming Lei
@ 2025-10-28 8:56 ` Ming Lei
2025-10-28 21:52 ` Caleb Sander Mateos
2025-10-28 8:56 ` [PATCH V2 4/5] selftests: ublk: set CPU affinity before thread initialization Ming Lei
2025-10-28 8:56 ` [PATCH V2 5/5] selftests: ublk: make ublk_thread thread-local variable Ming Lei
4 siblings, 1 reply; 8+ messages in thread
From: Ming Lei @ 2025-10-28 8:56 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Convert ublk_queue to use DECLARE_FLEX_ARRAY for the ios field and
use struct_size() for allocation, following kernel best practices.
Changes in this commit:
1. Convert ios field from "struct ublk_io ios[]" to use
DECLARE_FLEX_ARRAY(struct ublk_io, ios) for consistency with
modern kernel style.
2. Update ublk_init_queue() to use struct_size(ubq, ios, depth)
instead of manual size calculation (sizeof(struct ublk_queue) +
depth * sizeof(struct ublk_io)).
This provides better type safety and makes the code more maintainable
by using standard kernel macros for flexible array handling.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 394e9b5f512f..cef9cfa94feb 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -203,7 +203,8 @@ struct ublk_queue {
bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
spinlock_t cancel_lock;
struct ublk_device *dev;
- struct ublk_io ios[];
+
+ DECLARE_FLEX_ARRAY(struct ublk_io, ios);
};
struct ublk_device {
@@ -2700,7 +2701,6 @@ static int ublk_get_queue_numa_node(struct ublk_device *ub, int q_id)
static int ublk_init_queue(struct ublk_device *ub, int q_id)
{
int depth = ub->dev_info.queue_depth;
- int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
struct ublk_queue *ubq;
struct page *page;
@@ -2711,7 +2711,8 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
numa_node = ublk_get_queue_numa_node(ub, q_id);
/* Allocate queue structure on local NUMA node */
- ubq = kvzalloc_node(ubq_size, GFP_KERNEL, numa_node);
+ ubq = kvzalloc_node(struct_size(ubq, ios, depth), GFP_KERNEL,
+ numa_node);
if (!ubq)
return -ENOMEM;
--
2.47.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios
2025-10-28 8:56 ` [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios Ming Lei
@ 2025-10-28 21:52 ` Caleb Sander Mateos
2025-10-29 2:51 ` Ming Lei
0 siblings, 1 reply; 8+ messages in thread
From: Caleb Sander Mateos @ 2025-10-28 21:52 UTC (permalink / raw)
To: Ming Lei; +Cc: Jens Axboe, linux-block, Uday Shankar
On Tue, Oct 28, 2025 at 1:57 AM Ming Lei <ming.lei@redhat.com> wrote:
>
> Convert ublk_queue to use DECLARE_FLEX_ARRAY for the ios field and
> use struct_size() for allocation, following kernel best practices.
>
> Changes in this commit:
>
> 1. Convert ios field from "struct ublk_io ios[]" to use
> DECLARE_FLEX_ARRAY(struct ublk_io, ios) for consistency with
> modern kernel style.
Documentation/process/deprecated.rst suggests that
DECLARE_FLEX_ARRAY() is discouraged except in the niche cases when
it's necessary (which don't apply here). Or am I misunderstanding
something? However, struct ublk_io ios[] does seem like a good use
case for __counted_by().
>
> 2. Update ublk_init_queue() to use struct_size(ubq, ios, depth)
> instead of manual size calculation (sizeof(struct ublk_queue) +
> depth * sizeof(struct ublk_io)).
Sure, this looks like an improvement.
Best,
Caleb
>
> This provides better type safety and makes the code more maintainable
> by using standard kernel macros for flexible array handling.
>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> drivers/block/ublk_drv.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 394e9b5f512f..cef9cfa94feb 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -203,7 +203,8 @@ struct ublk_queue {
> bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
> spinlock_t cancel_lock;
> struct ublk_device *dev;
> - struct ublk_io ios[];
> +
> + DECLARE_FLEX_ARRAY(struct ublk_io, ios);
> };
>
> struct ublk_device {
> @@ -2700,7 +2701,6 @@ static int ublk_get_queue_numa_node(struct ublk_device *ub, int q_id)
> static int ublk_init_queue(struct ublk_device *ub, int q_id)
> {
> int depth = ub->dev_info.queue_depth;
> - int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io);
> gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
> struct ublk_queue *ubq;
> struct page *page;
> @@ -2711,7 +2711,8 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
> numa_node = ublk_get_queue_numa_node(ub, q_id);
>
> /* Allocate queue structure on local NUMA node */
> - ubq = kvzalloc_node(ubq_size, GFP_KERNEL, numa_node);
> + ubq = kvzalloc_node(struct_size(ubq, ios, depth), GFP_KERNEL,
> + numa_node);
> if (!ubq)
> return -ENOMEM;
>
> --
> 2.47.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios
2025-10-28 21:52 ` Caleb Sander Mateos
@ 2025-10-29 2:51 ` Ming Lei
0 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-10-29 2:51 UTC (permalink / raw)
To: Caleb Sander Mateos; +Cc: Jens Axboe, linux-block, Uday Shankar
On Tue, Oct 28, 2025 at 02:52:25PM -0700, Caleb Sander Mateos wrote:
> On Tue, Oct 28, 2025 at 1:57 AM Ming Lei <ming.lei@redhat.com> wrote:
> >
> > Convert ublk_queue to use DECLARE_FLEX_ARRAY for the ios field and
> > use struct_size() for allocation, following kernel best practices.
> >
> > Changes in this commit:
> >
> > 1. Convert ios field from "struct ublk_io ios[]" to use
> > DECLARE_FLEX_ARRAY(struct ublk_io, ios) for consistency with
> > modern kernel style.
>
> Documentation/process/deprecated.rst suggests that
> DECLARE_FLEX_ARRAY() is discouraged except in the niche cases when
> it's necessary (which don't apply here). Or am I misunderstanding
> something?
You are right, DECLARE_FLEX_ARRAY is only needed:
```
when the flexible array is either alone in a struct or is part of a union.
```
> However, struct ublk_io ios[] does seem like a good use
> case for __counted_by().
Good point!
Thanks,
Ming
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH V2 4/5] selftests: ublk: set CPU affinity before thread initialization
2025-10-28 8:56 [PATCH V2 0/5] ublk: NUMA-aware memory allocation Ming Lei
` (2 preceding siblings ...)
2025-10-28 8:56 ` [PATCH V2 3/5] ublk: use flexible array for ublk_queue.ios Ming Lei
@ 2025-10-28 8:56 ` Ming Lei
2025-10-28 8:56 ` [PATCH V2 5/5] selftests: ublk: make ublk_thread thread-local variable Ming Lei
4 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-10-28 8:56 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Move ublk_thread_set_sched_affinity() call before ublk_thread_init()
to ensure memory allocations during thread initialization occur on
the correct NUMA node. This leverages Linux's first-touch memory
policy for better NUMA locality.
Also convert ublk_thread_set_sched_affinity() to use
pthread_setaffinity_np() instead of sched_setaffinity(), as the
pthread API is the proper interface for setting thread affinity in
multithreaded programs.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
tools/testing/selftests/ublk/kublk.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 6b8123c12a7a..062537ab8976 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -839,7 +839,7 @@ static int ublk_process_io(struct ublk_thread *t)
static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
cpu_set_t *cpuset)
{
- if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
+ if (pthread_setaffinity_np(pthread_self(), sizeof(*cpuset), cpuset) < 0)
ublk_err("ublk dev %u thread %u set affinity failed",
t->dev->dev_info.dev_id, t->idx);
}
@@ -862,15 +862,21 @@ static void *ublk_io_handler_fn(void *data)
t->dev = info->dev;
t->idx = info->idx;
+ /*
+ * IO perf is sensitive with queue pthread affinity on NUMA machine
+ *
+ * Set sched_affinity at beginning, so following allocated memory/pages
+ * could be CPU/NUMA aware.
+ */
+ if (info->affinity)
+ ublk_thread_set_sched_affinity(t, info->affinity);
+
ret = ublk_thread_init(t, info->extra_flags);
if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
dev_id, t->idx);
return NULL;
}
- /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
- if (info->affinity)
- ublk_thread_set_sched_affinity(t, info->affinity);
sem_post(info->ready);
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
--
2.47.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH V2 5/5] selftests: ublk: make ublk_thread thread-local variable
2025-10-28 8:56 [PATCH V2 0/5] ublk: NUMA-aware memory allocation Ming Lei
` (3 preceding siblings ...)
2025-10-28 8:56 ` [PATCH V2 4/5] selftests: ublk: set CPU affinity before thread initialization Ming Lei
@ 2025-10-28 8:56 ` Ming Lei
4 siblings, 0 replies; 8+ messages in thread
From: Ming Lei @ 2025-10-28 8:56 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Uday Shankar, Caleb Sander Mateos, Ming Lei
Refactor ublk_thread to be a thread-local variable instead of storing
it in ublk_dev:
- Remove pthread_t thread field from struct ublk_thread and move it to
struct ublk_thread_info
- Remove struct ublk_thread array from struct ublk_dev, reducing memory
footprint
- Define struct ublk_thread as local variable in __ublk_io_handler_fn()
instead of accessing it from dev->threads[]
- Extract main IO handling logic into __ublk_io_handler_fn() which is
marked as noinline
- Move CPU affinity setup to ublk_io_handler_fn() before calling
__ublk_io_handler_fn()
- Update ublk_thread_set_sched_affinity() to take struct ublk_thread_info *
instead of struct ublk_thread *, and use pthread_setaffinity_np()
instead of sched_setaffinity()
- Reorder struct ublk_thread fields to group related state together
This change makes each thread's ublk_thread structure truly local to
the thread, improving cache locality and reducing memory usage.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
tools/testing/selftests/ublk/kublk.c | 76 +++++++++++++++-------------
tools/testing/selftests/ublk/kublk.h | 9 ++--
2 files changed, 45 insertions(+), 40 deletions(-)
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 062537ab8976..f8fa102a627f 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -836,62 +836,70 @@ static int ublk_process_io(struct ublk_thread *t)
return reapped;
}
-static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
- cpu_set_t *cpuset)
-{
- if (pthread_setaffinity_np(pthread_self(), sizeof(*cpuset), cpuset) < 0)
- ublk_err("ublk dev %u thread %u set affinity failed",
- t->dev->dev_info.dev_id, t->idx);
-}
-
struct ublk_thread_info {
struct ublk_dev *dev;
+ pthread_t thread;
unsigned idx;
sem_t *ready;
cpu_set_t *affinity;
unsigned long long extra_flags;
};
-static void *ublk_io_handler_fn(void *data)
+static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
{
- struct ublk_thread_info *info = data;
- struct ublk_thread *t = &info->dev->threads[info->idx];
+ if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0)
+ ublk_err("ublk dev %u thread %u set affinity failed",
+ info->dev->dev_info.dev_id, info->idx);
+}
+
+static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info)
+{
+ struct ublk_thread t = {
+ .dev = info->dev,
+ .idx = info->idx,
+ };
int dev_id = info->dev->dev_info.dev_id;
int ret;
- t->dev = info->dev;
- t->idx = info->idx;
-
- /*
- * IO perf is sensitive with queue pthread affinity on NUMA machine
- *
- * Set sched_affinity at beginning, so following allocated memory/pages
- * could be CPU/NUMA aware.
- */
- if (info->affinity)
- ublk_thread_set_sched_affinity(t, info->affinity);
-
- ret = ublk_thread_init(t, info->extra_flags);
+ ret = ublk_thread_init(&t, info->extra_flags);
if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
- dev_id, t->idx);
- return NULL;
+ dev_id, t.idx);
+ return ret;
}
sem_post(info->ready);
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
- gettid(), dev_id, t->idx);
+ gettid(), dev_id, t.idx);
/* submit all io commands to ublk driver */
- ublk_submit_fetch_commands(t);
+ ublk_submit_fetch_commands(&t);
do {
- if (ublk_process_io(t) < 0)
+ if (ublk_process_io(&t) < 0)
break;
} while (1);
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
- gettid(), dev_id, t->idx);
- ublk_thread_deinit(t);
+ gettid(), dev_id, t.idx);
+ ublk_thread_deinit(&t);
+ return 0;
+}
+
+static void *ublk_io_handler_fn(void *data)
+{
+ struct ublk_thread_info *info = data;
+
+ /*
+ * IO perf is sensitive with queue pthread affinity on NUMA machine
+ *
+ * Set sched_affinity at beginning, so following allocated memory/pages
+ * could be CPU/NUMA aware.
+ */
+ if (info->affinity)
+ ublk_thread_set_sched_affinity(info);
+
+ __ublk_io_handler_fn(info);
+
return NULL;
}
@@ -989,14 +997,13 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
*/
if (dev->nthreads == dinfo->nr_hw_queues)
tinfo[i].affinity = &affinity_buf[i];
- pthread_create(&dev->threads[i].thread, NULL,
+ pthread_create(&tinfo[i].thread, NULL,
ublk_io_handler_fn,
&tinfo[i]);
}
for (i = 0; i < dev->nthreads; i++)
sem_wait(&ready);
- free(tinfo);
free(affinity_buf);
/* everything is fine now, start us */
@@ -1019,7 +1026,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
/* wait until we are terminated */
for (i = 0; i < dev->nthreads; i++)
- pthread_join(dev->threads[i].thread, &thread_ret);
+ pthread_join(tinfo[i].thread, &thread_ret);
+ free(tinfo);
fail:
for (i = 0; i < dinfo->nr_hw_queues; i++)
ublk_queue_deinit(&dev->q[i]);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 5e55484fb0aa..fe42705c6d42 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -175,23 +175,20 @@ struct ublk_queue {
struct ublk_thread {
struct ublk_dev *dev;
- struct io_uring ring;
- unsigned int cmd_inflight;
- unsigned int io_inflight;
-
- pthread_t thread;
unsigned idx;
#define UBLKS_T_STOPPING (1U << 0)
#define UBLKS_T_IDLE (1U << 1)
unsigned state;
+ unsigned int cmd_inflight;
+ unsigned int io_inflight;
+ struct io_uring ring;
};
struct ublk_dev {
struct ublk_tgt tgt;
struct ublksrv_ctrl_dev_info dev_info;
struct ublk_queue q[UBLK_MAX_QUEUES];
- struct ublk_thread threads[UBLK_MAX_THREADS];
unsigned nthreads;
unsigned per_io_tasks;
--
2.47.0
^ permalink raw reply related [flat|nested] 8+ messages in thread