* [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray
@ 2025-11-28 8:53 Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 1/2] " Fengnan Chang
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Fengnan Chang @ 2025-11-28 8:53 UTC (permalink / raw)
To: axboe, linux-block, ming.lei, hare, hch, yukuai; +Cc: Fengnan Chang
After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
an xarray instead of array to store hctx, but in poll mode, each time
in blk_mq_poll, we need use xa_load to find corresponding hctx, this
introduce some costs. In my test, xa_load may cost 3.8% cpu.
After revert previous change, eliminates the overhead of xa_load and can
result in a 3% performance improvement.
potentital use-after-free on q->queue_hw_ctx can be fixed by use rcu to
avoid, same as Yu Kuai did in [1].
[1] https://lore.kernel.org/all/20220225072053.2472431-1-yukuai3@huawei.com/
v3:
fix build error and part sparse warnings, not all sparse warnings, because
the queue is freezed in __blk_mq_update_nr_hw_queues, only need protect
'queue_hw_ctx' through rcu where it can be accessed without grabbing
'q_usage_counter'.
v2:
1. modify synchronize_rcu() to synchronize_rcu_expedited()
2. use rcu_dereference(q->queue_hw_ctx)[id] in queue_hctx to better read.
Fengnan Chang (2):
blk-mq: use array manage hctx map instead of xarray
blk-mq: fix potential uaf for 'queue_hw_ctx'
block/blk-mq-tag.c | 2 +-
block/blk-mq.c | 63 ++++++++++++++++++++++++++++--------------
block/blk-mq.h | 2 +-
include/linux/blk-mq.h | 14 +++++++++-
include/linux/blkdev.h | 2 +-
5 files changed, 58 insertions(+), 25 deletions(-)
base-commit: 4941a17751c99e17422be743c02c923ad706f888
--
2.39.5 (Apple Git-154)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 1/2] blk-mq: use array manage hctx map instead of xarray
2025-11-28 8:53 [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Fengnan Chang
@ 2025-11-28 8:53 ` Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx' Fengnan Chang
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Fengnan Chang @ 2025-11-28 8:53 UTC (permalink / raw)
To: axboe, linux-block, ming.lei, hare, hch, yukuai; +Cc: Fengnan Chang
After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
an xarray instead of array to store hctx, but in poll mode, each time
in blk_mq_poll, we need use xa_load to find corresponding hctx, this
introduce some costs. In my test, xa_load may cost 3.8% cpu.
This patch revert previous change, eliminates the overhead of xa_load
and can result in a 3% performance improvement.
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
block/blk-mq-tag.c | 2 +-
block/blk-mq.c | 58 +++++++++++++++++++++++++++---------------
block/blk-mq.h | 2 +-
include/linux/blk-mq.h | 3 ++-
include/linux/blkdev.h | 2 +-
5 files changed, 42 insertions(+), 25 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 5b664dbdf655..33946cdb5716 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -499,7 +499,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
int srcu_idx;
/*
- * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
+ * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
* while the queue is frozen. So we can use q_usage_counter to avoid
* racing with it.
*/
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d626d32f6e57..eed12fab3484 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -723,7 +723,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
* If not tell the caller that it should skip this queue.
*/
ret = -EXDEV;
- data.hctx = xa_load(&q->hctx_table, hctx_idx);
+ data.hctx = q->queue_hw_ctx[hctx_idx];
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
@@ -3935,8 +3935,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
blk_free_flush_queue_callback);
hctx->fq = NULL;
- xa_erase(&q->hctx_table, hctx_idx);
-
spin_lock(&q->unused_hctx_lock);
list_add(&hctx->hctx_list, &q->unused_hctx_list);
spin_unlock(&q->unused_hctx_lock);
@@ -3978,14 +3976,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
hctx->numa_node))
goto exit_hctx;
- if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
- goto exit_flush_rq;
-
return 0;
- exit_flush_rq:
- if (set->ops->exit_request)
- set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
@@ -4374,7 +4366,7 @@ void blk_mq_release(struct request_queue *q)
kobject_put(&hctx->kobj);
}
- xa_destroy(&q->hctx_table);
+ kfree(q->queue_hw_ctx);
/*
* release .mq_kobj and sw queue's kobject now because
@@ -4518,26 +4510,44 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
- unsigned long i, j;
+ int i, j, end;
+ struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
+
+ if (q->nr_hw_queues < set->nr_hw_queues) {
+ struct blk_mq_hw_ctx **new_hctxs;
+
+ new_hctxs = kcalloc_node(set->nr_hw_queues,
+ sizeof(*new_hctxs), GFP_KERNEL,
+ set->numa_node);
+ if (!new_hctxs)
+ return;
+ if (hctxs)
+ memcpy(new_hctxs, hctxs, q->nr_hw_queues *
+ sizeof(*hctxs));
+ q->queue_hw_ctx = new_hctxs;
+ kfree(hctxs);
+ hctxs = new_hctxs;
+ }
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
- struct blk_mq_hw_ctx *old_hctx = xa_load(&q->hctx_table, i);
+ struct blk_mq_hw_ctx *old_hctx = hctxs[i];
if (old_hctx) {
old_node = old_hctx->numa_node;
blk_mq_exit_hctx(q, set, old_hctx, i);
}
- if (!blk_mq_alloc_and_init_hctx(set, q, i, node)) {
+ hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i, node);
+ if (!hctxs[i]) {
if (!old_hctx)
break;
pr_warn("Allocate new hctx on node %d fails, fallback to previous one on node %d\n",
node, old_node);
- hctx = blk_mq_alloc_and_init_hctx(set, q, i, old_node);
- WARN_ON_ONCE(!hctx);
+ hctxs[i] = blk_mq_alloc_and_init_hctx(set, q, i,
+ old_node);
+ WARN_ON_ONCE(!hctxs[i]);
}
}
/*
@@ -4546,13 +4556,21 @@ static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
*/
if (i != set->nr_hw_queues) {
j = q->nr_hw_queues;
+ end = i;
} else {
j = i;
+ end = q->nr_hw_queues;
q->nr_hw_queues = set->nr_hw_queues;
}
- xa_for_each_start(&q->hctx_table, j, hctx, j)
- blk_mq_exit_hctx(q, set, hctx, j);
+ for (; j < end; j++) {
+ struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+ if (hctx) {
+ blk_mq_exit_hctx(q, set, hctx, j);
+ hctxs[j] = NULL;
+ }
+ }
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
@@ -4588,8 +4606,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);
- xa_init(&q->hctx_table);
-
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
goto err_hctxs;
@@ -5168,7 +5184,7 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
{
if (!blk_mq_can_poll(q))
return 0;
- return blk_hctx_poll(q, xa_load(&q->hctx_table, cookie), iob, flags);
+ return blk_hctx_poll(q, q->queue_hw_ctx[cookie], iob, flags);
}
int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
diff --git a/block/blk-mq.h b/block/blk-mq.h
index c4fccdeb5441..80a3f0c2bce7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -84,7 +84,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
enum hctx_type type,
unsigned int cpu)
{
- return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]);
+ return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
}
static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b25d12545f46..0795f29dd65d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1000,7 +1000,8 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
}
#define queue_for_each_hw_ctx(q, hctx, i) \
- xa_for_each(&(q)->hctx_table, (i), (hctx))
+ for ((i) = 0; (i) < (q)->nr_hw_queues && \
+ ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 70b671a9a7f7..56328080ca09 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -493,7 +493,7 @@ struct request_queue {
/* hw dispatch queues */
unsigned int nr_hw_queues;
- struct xarray hctx_table;
+ struct blk_mq_hw_ctx **queue_hw_ctx;
struct percpu_ref q_usage_counter;
struct lock_class_key io_lock_cls_key;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx'
2025-11-28 8:53 [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 1/2] " Fengnan Chang
@ 2025-11-28 8:53 ` Fengnan Chang
2025-11-28 16:20 ` Jens Axboe
2025-11-28 9:54 ` [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Yu Kuai
2025-11-28 16:22 ` Jens Axboe
3 siblings, 1 reply; 9+ messages in thread
From: Fengnan Chang @ 2025-11-28 8:53 UTC (permalink / raw)
To: axboe, linux-block, ming.lei, hare, hch, yukuai; +Cc: Fengnan Chang, Yu Kuai
This is just apply Kuai's patch in [1] with mirror changes.
blk_mq_realloc_hw_ctxs() will free the 'queue_hw_ctx'(e.g. undate
submit_queues through configfs for null_blk), while it might still be
used from other context(e.g. switch elevator to none):
t1 t2
elevator_switch
blk_mq_unquiesce_queue
blk_mq_run_hw_queues
queue_for_each_hw_ctx
// assembly code for hctx = (q)->queue_hw_ctx[i]
mov 0x48(%rbp),%rdx -> read old queue_hw_ctx
__blk_mq_update_nr_hw_queues
blk_mq_realloc_hw_ctxs
hctxs = q->queue_hw_ctx
q->queue_hw_ctx = new_hctxs
kfree(hctxs)
movslq %ebx,%rax
mov (%rdx,%rax,8),%rdi ->uaf
This problem was found by code review, and I comfirmed that the concurrent
scenario do exist(specifically 'q->queue_hw_ctx' can be changed during
blk_mq_run_hw_queues()), however, the uaf problem hasn't been repoduced yet
without hacking the kernel.
Sicne the queue is freezed in __blk_mq_update_nr_hw_queues(), fix the
problem by protecting 'queue_hw_ctx' through rcu where it can be accessed
without grabbing 'q_usage_counter'.
[1] https://lore.kernel.org/all/20220225072053.2472431-1-yukuai3@huawei.com/
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
block/blk-mq.c | 7 ++++++-
include/linux/blk-mq.h | 13 ++++++++++++-
include/linux/blkdev.h | 2 +-
3 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index eed12fab3484..0b8b72194003 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4524,7 +4524,12 @@ static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
if (hctxs)
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
sizeof(*hctxs));
- q->queue_hw_ctx = new_hctxs;
+ rcu_assign_pointer(q->queue_hw_ctx, new_hctxs);
+ /*
+ * Make sure reading the old queue_hw_ctx from other
+ * context concurrently won't trigger uaf.
+ */
+ synchronize_rcu_expedited();
kfree(hctxs);
hctxs = new_hctxs;
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0795f29dd65d..c16875b35521 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -999,9 +999,20 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
return rq + 1;
}
+static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ rcu_read_lock();
+ hctx = rcu_dereference(q->queue_hw_ctx)[id];
+ rcu_read_unlock();
+
+ return hctx;
+}
+
#define queue_for_each_hw_ctx(q, hctx, i) \
for ((i) = 0; (i) < (q)->nr_hw_queues && \
- ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
+ ({ hctx = queue_hctx((q), i); 1; }); (i)++)
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 56328080ca09..e25d9802e08b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -493,7 +493,7 @@ struct request_queue {
/* hw dispatch queues */
unsigned int nr_hw_queues;
- struct blk_mq_hw_ctx **queue_hw_ctx;
+ struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
struct percpu_ref q_usage_counter;
struct lock_class_key io_lock_cls_key;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray
2025-11-28 8:53 [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 1/2] " Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx' Fengnan Chang
@ 2025-11-28 9:54 ` Yu Kuai
2025-11-28 10:00 ` fengnan chang
2025-11-28 16:22 ` Jens Axboe
3 siblings, 1 reply; 9+ messages in thread
From: Yu Kuai @ 2025-11-28 9:54 UTC (permalink / raw)
To: Fengnan Chang, axboe, linux-block, ming.lei, hare, hch, Yu Kuai
Cc: Fengnan Chang
Hi,
在 2025/11/28 16:53, Fengnan Chang 写道:
> After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
> an xarray instead of array to store hctx, but in poll mode, each time
> in blk_mq_poll, we need use xa_load to find corresponding hctx, this
> introduce some costs. In my test, xa_load may cost 3.8% cpu.
>
> After revert previous change, eliminates the overhead of xa_load and can
> result in a 3% performance improvement.
>
> potentital use-after-free on q->queue_hw_ctx can be fixed by use rcu to
> avoid, same as Yu Kuai did in [1].
Hope I'm not too late for the party. I'm not against for this set, just
wonder have we considered changing to store hctx directly in bio for
blk-mq devices, are we strongly against to increase bio size?
>
> [1] https://lore.kernel.org/all/20220225072053.2472431-1-yukuai3@huawei.com/
>
> v3:
> fix build error and part sparse warnings, not all sparse warnings, because
> the queue is freezed in __blk_mq_update_nr_hw_queues, only need protect
> 'queue_hw_ctx' through rcu where it can be accessed without grabbing
> 'q_usage_counter'.
>
> v2:
> 1. modify synchronize_rcu() to synchronize_rcu_expedited()
> 2. use rcu_dereference(q->queue_hw_ctx)[id] in queue_hctx to better read.
>
> Fengnan Chang (2):
> blk-mq: use array manage hctx map instead of xarray
> blk-mq: fix potential uaf for 'queue_hw_ctx'
>
> block/blk-mq-tag.c | 2 +-
> block/blk-mq.c | 63 ++++++++++++++++++++++++++++--------------
> block/blk-mq.h | 2 +-
> include/linux/blk-mq.h | 14 +++++++++-
> include/linux/blkdev.h | 2 +-
> 5 files changed, 58 insertions(+), 25 deletions(-)
>
>
> base-commit: 4941a17751c99e17422be743c02c923ad706f888
--
Thanks,
Kuai
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray
2025-11-28 9:54 ` [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Yu Kuai
@ 2025-11-28 10:00 ` fengnan chang
2025-11-28 16:09 ` Jens Axboe
0 siblings, 1 reply; 9+ messages in thread
From: fengnan chang @ 2025-11-28 10:00 UTC (permalink / raw)
To: yukuai; +Cc: axboe, linux-block, ming.lei, hare, hch, Fengnan Chang
On Fri, Nov 28, 2025 at 5:55 PM Yu Kuai <yukuai@fnnas.com> wrote:
>
> Hi,
>
> 在 2025/11/28 16:53, Fengnan Chang 写道:
> > After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
> > an xarray instead of array to store hctx, but in poll mode, each time
> > in blk_mq_poll, we need use xa_load to find corresponding hctx, this
> > introduce some costs. In my test, xa_load may cost 3.8% cpu.
> >
> > After revert previous change, eliminates the overhead of xa_load and can
> > result in a 3% performance improvement.
> >
> > potentital use-after-free on q->queue_hw_ctx can be fixed by use rcu to
> > avoid, same as Yu Kuai did in [1].
>
> Hope I'm not too late for the party. I'm not against for this set, just
> wonder have we considered changing to store hctx directly in bio for
> blk-mq devices, are we strongly against to increase bio size?
I've thought about that put the hctx in the bio, it's a better way, but
increasing the size of the bio, which is now exactly 128 bytes.
>
> >
> > [1] https://lore.kernel.org/all/20220225072053.2472431-1-yukuai3@huawei.com/
> >
> > v3:
> > fix build error and part sparse warnings, not all sparse warnings, because
> > the queue is freezed in __blk_mq_update_nr_hw_queues, only need protect
> > 'queue_hw_ctx' through rcu where it can be accessed without grabbing
> > 'q_usage_counter'.
> >
> > v2:
> > 1. modify synchronize_rcu() to synchronize_rcu_expedited()
> > 2. use rcu_dereference(q->queue_hw_ctx)[id] in queue_hctx to better read.
> >
> > Fengnan Chang (2):
> > blk-mq: use array manage hctx map instead of xarray
> > blk-mq: fix potential uaf for 'queue_hw_ctx'
> >
> > block/blk-mq-tag.c | 2 +-
> > block/blk-mq.c | 63 ++++++++++++++++++++++++++++--------------
> > block/blk-mq.h | 2 +-
> > include/linux/blk-mq.h | 14 +++++++++-
> > include/linux/blkdev.h | 2 +-
> > 5 files changed, 58 insertions(+), 25 deletions(-)
> >
> >
> > base-commit: 4941a17751c99e17422be743c02c923ad706f888
>
> --
> Thanks,
> Kuai
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray
2025-11-28 10:00 ` fengnan chang
@ 2025-11-28 16:09 ` Jens Axboe
0 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2025-11-28 16:09 UTC (permalink / raw)
To: fengnan chang, yukuai; +Cc: linux-block, ming.lei, hare, hch, Fengnan Chang
On 11/28/25 3:00 AM, fengnan chang wrote:
> On Fri, Nov 28, 2025 at 5:55 PM Yu Kuai <yukuai@fnnas.com> wrote:
>>
>> Hi,
>>
>> 在 2025/11/28 16:53, Fengnan Chang 写道:
>>> After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
>>> an xarray instead of array to store hctx, but in poll mode, each time
>>> in blk_mq_poll, we need use xa_load to find corresponding hctx, this
>>> introduce some costs. In my test, xa_load may cost 3.8% cpu.
>>>
>>> After revert previous change, eliminates the overhead of xa_load and can
>>> result in a 3% performance improvement.
>>>
>>> potentital use-after-free on q->queue_hw_ctx can be fixed by use rcu to
>>> avoid, same as Yu Kuai did in [1].
>>
>> Hope I'm not too late for the party. I'm not against for this set, just
>> wonder have we considered changing to store hctx directly in bio for
>> blk-mq devices, are we strongly against to increase bio size?
>
> I've thought about that put the hctx in the bio, it's a better way, but
> increasing the size of the bio, which is now exactly 128 bytes.
Don't think it's worth it for polled IO.
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx'
2025-11-28 8:53 ` [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx' Fengnan Chang
@ 2025-11-28 16:20 ` Jens Axboe
2025-12-01 12:26 ` fengnan chang
0 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2025-11-28 16:20 UTC (permalink / raw)
To: Fengnan Chang, linux-block, ming.lei, hare, hch, yukuai
Cc: Fengnan Chang, Yu Kuai
On 11/28/25 1:53 AM, Fengnan Chang wrote:
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 0795f29dd65d..c16875b35521 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -999,9 +999,20 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
> return rq + 1;
> }
>
> +static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
> +{
> + struct blk_mq_hw_ctx *hctx;
> +
> + rcu_read_lock();
> + hctx = rcu_dereference(q->queue_hw_ctx)[id];
> + rcu_read_unlock();
> +
> + return hctx;
> +}
Should eg blk_mq_map_queue_type() use this helper now too?
Note: I've applied this, so anything beyond this v3 should be an
incremental against the current tree.
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray
2025-11-28 8:53 [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Fengnan Chang
` (2 preceding siblings ...)
2025-11-28 9:54 ` [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Yu Kuai
@ 2025-11-28 16:22 ` Jens Axboe
3 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2025-11-28 16:22 UTC (permalink / raw)
To: linux-block, ming.lei, hare, hch, yukuai, Fengnan Chang; +Cc: Fengnan Chang
On Fri, 28 Nov 2025 16:53:12 +0800, Fengnan Chang wrote:
> After commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray"), we use
> an xarray instead of array to store hctx, but in poll mode, each time
> in blk_mq_poll, we need use xa_load to find corresponding hctx, this
> introduce some costs. In my test, xa_load may cost 3.8% cpu.
>
> After revert previous change, eliminates the overhead of xa_load and can
> result in a 3% performance improvement.
>
> [...]
Applied, thanks!
[1/2] blk-mq: use array manage hctx map instead of xarray
commit: d0c98769ee7d5db8d699a270690639cde1766cd4
[2/2] blk-mq: fix potential uaf for 'queue_hw_ctx'
commit: 89e1fb7ceffd898505ad7fa57acec0585bfaa2cc
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx'
2025-11-28 16:20 ` Jens Axboe
@ 2025-12-01 12:26 ` fengnan chang
0 siblings, 0 replies; 9+ messages in thread
From: fengnan chang @ 2025-12-01 12:26 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, ming.lei, hare, hch, yukuai, Fengnan Chang, Yu Kuai
On Sat, Nov 29, 2025 at 12:20 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 11/28/25 1:53 AM, Fengnan Chang wrote:
> > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> > index 0795f29dd65d..c16875b35521 100644
> > --- a/include/linux/blk-mq.h
> > +++ b/include/linux/blk-mq.h
> > @@ -999,9 +999,20 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
> > return rq + 1;
> > }
> >
> > +static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
> > +{
> > + struct blk_mq_hw_ctx *hctx;
> > +
> > + rcu_read_lock();
> > + hctx = rcu_dereference(q->queue_hw_ctx)[id];
> > + rcu_read_unlock();
> > +
> > + return hctx;
> > +}
>
> Should eg blk_mq_map_queue_type() use this helper now too?
You are right, now some caller of blk_mq_map_queue_type now didn't
grab 'q_usage_counter', such as blk_mq_cpu_mapped_to_hctx.
Also checked all other functions, no more missed cases.
New patch:
https://lore.kernel.org/linux-block/20251201122504.64439-1-changfengnan@bytedance.com/T/#u
Thanks.
>
> Note: I've applied this, so anything beyond this v3 should be an
> incremental against the current tree.
>
> --
> Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-12-01 12:26 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-28 8:53 [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 1/2] " Fengnan Chang
2025-11-28 8:53 ` [PATCH v3 2/2] blk-mq: fix potential uaf for 'queue_hw_ctx' Fengnan Chang
2025-11-28 16:20 ` Jens Axboe
2025-12-01 12:26 ` fengnan chang
2025-11-28 9:54 ` [PATCH v3 0/2] blk-mq: use array manage hctx map instead of xarray Yu Kuai
2025-11-28 10:00 ` fengnan chang
2025-11-28 16:09 ` Jens Axboe
2025-11-28 16:22 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).