From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>,
"jianchao.wang" <jianchao.w.wang@oracle.com>,
"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>
Subject: Re: v4.20-rc6: Sporadic use-after-free in bt_iter()
Date: Thu, 20 Dec 2018 12:56:02 -0800 [thread overview]
Message-ID: <1545339362.185366.511.camel@acm.org> (raw)
In-Reply-To: <b9c3bd7c-9e82-7300-d525-0bd72ed66f66@kernel.dk>
On Thu, 2018-12-20 at 11:33 -0700, Jens Axboe wrote:
> [ ... ]
> Something like this, totally untested. If the queue matches, we know it's
> safe to dereference it.
>
> A safer API to export as well...
>
>
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 2089c6c62f44..78192b544fa2 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
>
> if (!reserved)
> bitnr += tags->nr_reserved_tags;
> - rq = tags->rqs[bitnr];
> + if (tags->rqs[bitnr].queue != hctx->queue)
> + return true;
>
> /*
> * We can hit rq == NULL here, because the tagging functions
> * test and set the bit before assigning ->rqs[].
> */
> - if (rq && rq->q == hctx->queue)
> + rq = tags->rqs[bitnr].rq;
> + if (rq)
> return iter_data->fn(hctx, rq, iter_data->data, reserved);
> return true;
> }
> @@ -268,6 +270,7 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
>
> struct bt_tags_iter_data {
> struct blk_mq_tags *tags;
> + struct blk_mq_hw_ctx *hctx;
> busy_tag_iter_fn *fn;
> void *data;
> bool reserved;
> @@ -287,7 +290,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
> * We can hit rq == NULL here, because the tagging functions
> * test and set the bit before assining ->rqs[].
> */
> - rq = tags->rqs[bitnr];
> + rq = tags->rqs[bitnr].rq;
> if (rq && blk_mq_request_started(rq))
> return iter_data->fn(rq, iter_data->data, reserved);
>
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index 61deab0b5a5a..bb84d1f099c7 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -4,6 +4,11 @@
>
> #include "blk-mq.h"
>
> +struct rq_tag_entry {
> + struct request_queue *queue;
> + struct request *rq;
> +};
> +
> /*
> * Tag address space map.
> */
> @@ -16,7 +21,7 @@ struct blk_mq_tags {
> struct sbitmap_queue bitmap_tags;
> struct sbitmap_queue breserved_tags;
>
> - struct request **rqs;
> + struct rq_tag_entry *rqs;
> struct request **static_rqs;
> struct list_head page_list;
> };
> @@ -78,7 +83,8 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
> static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
> unsigned int tag, struct request *rq)
> {
> - hctx->tags->rqs[tag] = rq;
> + hctx->tags->rqs[tag].queue = hctx->queue;
> + hctx->tags->rqs[tag].rq = rq;
> }
>
> static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ba37b9e15e9..4f194946dbd9 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -298,13 +298,16 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
> rq->tag = -1;
> rq->internal_tag = tag;
> } else {
> - if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
> + struct blk_mq_hw_ctx *hctx = data->hctx;
> +
> + if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
> rq_flags = RQF_MQ_INFLIGHT;
> - atomic_inc(&data->hctx->nr_active);
> + atomic_inc(&hctx->nr_active);
> }
> rq->tag = tag;
> rq->internal_tag = -1;
> - data->hctx->tags->rqs[rq->tag] = rq;
> + hctx->tags->rqs[rq->tag].queue = hctx->queue;
> + hctx->tags->rqs[rq->tag].rq = rq;
> }
>
> /* csd/requeue_work/fifo_time is initialized before use */
> @@ -797,8 +800,8 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
> struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
> {
> if (tag < tags->nr_tags) {
> - prefetch(tags->rqs[tag]);
> - return tags->rqs[tag];
> + prefetch(tags->rqs[tag].rq);
> + return tags->rqs[tag].rq;
> }
>
> return NULL;
> @@ -809,10 +812,11 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
> void *priv, bool reserved)
> {
> /*
> - * If we find a request that is inflight and the queue matches,
> - * we know the queue is busy. Return false to stop the iteration.
> + * We're only called here if the queue matches. If the rq state is
> + * inflight, we know the queue is busy. Return false to stop the
> + * iteration.
> */
> - if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
> + if (rq->state == MQ_RQ_IN_FLIGHT) {
> bool *busy = priv;
>
> *busy = true;
> @@ -1049,11 +1053,14 @@ bool blk_mq_get_driver_tag(struct request *rq)
> shared = blk_mq_tag_busy(data.hctx);
> rq->tag = blk_mq_get_tag(&data);
> if (rq->tag >= 0) {
> + struct blk_mq_hw_ctx *hctx = data.hctx;
> +
> if (shared) {
> rq->rq_flags |= RQF_MQ_INFLIGHT;
> atomic_inc(&data.hctx->nr_active);
> }
> - data.hctx->tags->rqs[rq->tag] = rq;
> + hctx->tags->rqs[rq->tag].queue = hctx->queue;
> + hctx->tags->rqs[rq->tag].rq = rq;
> }
>
> done:
> @@ -2069,7 +2076,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
> if (!tags)
> return NULL;
>
> - tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
> + tags->rqs = kcalloc_node(nr_tags, sizeof(struct rq_tag_entry),
> GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
> node);
> if (!tags->rqs) {
Hi Jens,
How about the patch below? Its behavior should be similar to your patch but I think
this patch is simpler.
Thanks,
Bart.
---
block/blk-mq.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6a7566244de3..c57611b1f2a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -86,6 +86,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
}
struct mq_inflight {
+ struct request_queue *q;
struct hd_struct *part;
unsigned int *inflight;
};
@@ -96,6 +97,9 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
+ if (rq->q != mi->q)
+ return;
+
/*
* index[0] counts the specific partition that was asked for. index[1]
* counts the ones that are active on the whole device, so increment
@@ -110,7 +114,11 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = {
+ .q = q,
+ .part = part,
+ .inflight = inflight,
+ };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
@@ -129,7 +137,11 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = {
+ .q = q,
+ .part = part,
+ .inflight = inflight,
+ };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
@@ -477,6 +489,8 @@ static void __blk_mq_free_request(struct request *rq)
const int sched_tag = rq->internal_tag;
blk_pm_mark_last_busy(rq);
+ /* Avoid that blk_mq_queue_tag_busy_iter() picks up this request. */
+ rq->q = NULL;
if (rq->tag != -1)
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
next prev parent reply other threads:[~2018-12-20 20:56 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-19 23:24 v4.20-rc6: Sporadic use-after-free in bt_iter() Bart Van Assche
2018-12-19 23:27 ` Jens Axboe
2018-12-20 0:16 ` Bart Van Assche
2018-12-20 3:17 ` Jens Axboe
2018-12-20 3:24 ` jianchao.wang
2018-12-20 4:19 ` Jens Axboe
2018-12-20 4:32 ` jianchao.wang
2018-12-20 4:48 ` Jens Axboe
2018-12-20 5:03 ` jianchao.wang
2018-12-20 13:02 ` Jens Axboe
2018-12-20 13:07 ` Jens Axboe
2018-12-20 18:01 ` Bart Van Assche
2018-12-20 18:21 ` Jens Axboe
2018-12-20 18:33 ` Jens Axboe
2018-12-20 20:56 ` Bart Van Assche [this message]
2018-12-20 21:00 ` Jens Axboe
2018-12-20 21:23 ` Bart Van Assche
2018-12-20 21:26 ` Jens Axboe
2018-12-20 21:31 ` Bart Van Assche
2018-12-20 21:34 ` Jens Axboe
2018-12-20 21:40 ` Bart Van Assche
2018-12-20 21:44 ` Jens Axboe
2018-12-20 21:48 ` Jens Axboe
2018-12-20 22:19 ` Bart Van Assche
2018-12-20 22:23 ` Jens Axboe
2018-12-20 22:33 ` Jens Axboe
2018-12-20 22:47 ` Jens Axboe
2018-12-20 22:50 ` Jens Axboe
2019-02-14 23:36 ` Bart Van Assche
2019-02-15 18:29 ` Evan Green
2019-02-19 16:48 ` Bart Van Assche
2019-02-21 20:54 ` Evan Green
2019-02-15 2:57 ` jianchao.wang
2018-12-20 4:06 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1545339362.185366.511.camel@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=jianchao.w.wang@oracle.com \
--cc=linux-block@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.