* [PATCH 1/2] blk-mq: rename blk_mq_can_use_cached_rq
2024-01-11 13:57 ensure q_usage_counter is held over bio splits Christoph Hellwig
@ 2024-01-11 13:57 ` Christoph Hellwig
2024-01-11 13:57 ` [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios Christoph Hellwig
` (2 subsequent siblings)
3 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 13:57 UTC (permalink / raw)
To: Jens Axboe; +Cc: Ming Lei, linux-block
blk_mq_can_use_cached_rq doesn't just check if we can use the request,
but also performs the work to actually use it. Remove the _can in the
naming, and improve the comment describing the function.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-mq.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa9a05fdd02377..a6731cacd0132c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2884,8 +2884,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
return NULL;
}
-/* return true if this @rq can be used for @bio */
-static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+/*
+ * Check if we can use the passed on request for submitting the passed in bio,
+ * and remove it from the request list if it can be used.
+ */
+static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
struct bio *bio)
{
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
@@ -2963,7 +2966,7 @@ void blk_mq_submit_bio(struct bio *bio)
return;
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
- if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ if (blk_mq_use_cached_rq(rq, plug, bio))
goto done;
percpu_ref_get(&q->q_usage_counter);
} else {
--
2.39.2
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 13:57 ensure q_usage_counter is held over bio splits Christoph Hellwig
2024-01-11 13:57 ` [PATCH 1/2] blk-mq: rename blk_mq_can_use_cached_rq Christoph Hellwig
@ 2024-01-11 13:57 ` Christoph Hellwig
2024-01-11 16:12 ` Jens Axboe
2024-01-11 22:22 ` Ming Lei
2024-01-11 16:03 ` ensure q_usage_counter is held over bio splits Jens Axboe
2024-01-14 14:38 ` (subset) " Jens Axboe
3 siblings, 2 replies; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 13:57 UTC (permalink / raw)
To: Jens Axboe; +Cc: Ming Lei, linux-block
q_usage_counter is the only thing preventing us from the limits changing
under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
Change __submit_bio to always acquire the q_usage_counter counter before
branching out into bio vs request based helper, and let blk_mq_submit_bio
tell it if it consumed the reference by handing it off to the request.
Fixes: 9d497e2941c3 ("block: don't protect submit_bio_checks by q_usage_counter")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-core.c | 14 ++++++++-----
block/blk-mq.c | 52 +++++++++++++++++++++---------------------------
block/blk-mq.h | 2 +-
3 files changed, 33 insertions(+), 35 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 9520ccab305007..885ba6bb58556f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -592,17 +592,21 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
static void __submit_bio(struct bio *bio)
{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
+ if (unlikely(bio_queue_enter(bio)))
+ return;
if (!bio->bi_bdev->bd_has_submit_bio) {
- blk_mq_submit_bio(bio);
- } else if (likely(bio_queue_enter(bio) == 0)) {
- struct gendisk *disk = bio->bi_bdev->bd_disk;
-
+ if (blk_mq_submit_bio(bio))
+ return;
+ } else {
disk->fops->submit_bio(bio);
- blk_queue_exit(disk->queue);
}
+
+ blk_queue_exit(disk->queue);
}
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a6731cacd0132c..421db29535ba50 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2936,14 +2936,17 @@ static void bio_set_ioprio(struct bio *bio)
*
* It will not queue the request if there is an error with the bio, or at the
* request creation.
+ *
+ * Returns %true if the q_usage_counter usage is consumed, or %false if it
+ * isn't.
*/
-void blk_mq_submit_bio(struct bio *bio)
+bool blk_mq_submit_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- struct request *rq = NULL;
+ struct request *rq;
unsigned int nr_segs = 1;
blk_status_t ret;
@@ -2951,39 +2954,28 @@ void blk_mq_submit_bio(struct bio *bio)
if (bio_may_exceed_limits(bio, &q->limits)) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
- return;
+ return false;
}
bio_set_ioprio(bio);
+ if (!bio_integrity_prep(bio))
+ return false;
+
if (plug) {
rq = rq_list_peek(&plug->cached_rq);
- if (rq && rq->q != q)
- rq = NULL;
- }
- if (rq) {
- if (!bio_integrity_prep(bio))
- return;
- if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
- return;
- if (blk_mq_use_cached_rq(rq, plug, bio))
- goto done;
- percpu_ref_get(&q->q_usage_counter);
- } else {
- if (unlikely(bio_queue_enter(bio)))
- return;
- if (!bio_integrity_prep(bio))
- goto fail;
+ if (rq && rq->q == q) {
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
+ return false;
+ if (blk_mq_use_cached_rq(rq, plug, bio))
+ goto has_rq;
+ }
}
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq)) {
-fail:
- blk_queue_exit(q);
- return;
- }
-
-done:
+ if (unlikely(!rq))
+ return false;
+has_rq:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);
@@ -2995,15 +2987,15 @@ void blk_mq_submit_bio(struct bio *bio)
bio->bi_status = ret;
bio_endio(bio);
blk_mq_free_request(rq);
- return;
+ return true;
}
if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
- return;
+ return true;
if (plug) {
blk_add_rq_to_plug(plug, rq);
- return;
+ return true;
}
hctx = rq->mq_hctx;
@@ -3014,6 +3006,8 @@ void blk_mq_submit_bio(struct bio *bio)
} else {
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
}
+
+ return true;
}
#ifdef CONFIG_BLK_MQ_STACKING
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f75a9ecfebde1b..d45f222f117748 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -39,7 +39,7 @@ enum {
typedef unsigned int __bitwise blk_insert_t;
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
-void blk_mq_submit_bio(struct bio *bio);
+bool blk_mq_submit_bio(struct bio *bio);
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
unsigned int flags);
void blk_mq_exit_queue(struct request_queue *q);
--
2.39.2
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 13:57 ` [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios Christoph Hellwig
@ 2024-01-11 16:12 ` Jens Axboe
2024-01-11 16:14 ` Christoph Hellwig
2024-01-11 22:22 ` Ming Lei
1 sibling, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2024-01-11 16:12 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On 1/11/24 6:57 AM, Christoph Hellwig wrote:
> q_usage_counter is the only thing preventing us from the limits changing
> under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
>
> Change __submit_bio to always acquire the q_usage_counter counter before
> branching out into bio vs request based helper, and let blk_mq_submit_bio
> tell it if it consumed the reference by handing it off to the request.
This causes hangs for me on shutdown/reset:
[ 56.146054] sd 6:0:0:0: [sdb] Synchronizing SCSI cache
[ 56.147739] sd 6:0:0:0: [sdb] Stopping disk
[ 56.148976] sd 0:0:0:0: [sda] Synchronizing SCSI cache
[ 56.150803] sd 0:0:0:0: [sda] Stopping disk
[ 75.549201] INFO: task systemd-shutdow:1 blocked for more than 15 seconds.
[ 75.549636] Not tainted 6.7.0-rc5-00173-g34d71db9cce2 #4540
[ 75.549977] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 75.550401] task:systemd-shutdow state:D stack:0 pid:1 tgid:1 ppid:0 flags:0x00000004
[ 75.550900] Call trace:
[ 75.551042] __switch_to+0x114/0x150
[ 75.551253] __schedule+0x510/0x10d4
[ 75.551451] schedule+0x7c/0x1ac
[ 75.551635] schedule_timeout+0xe8/0x1a4
[ 75.551857] blk_mq_freeze_queue_wait_timeout+0xf4/0x18c
[ 75.552157] nvme_wait_freeze_timeout+0x68/0xa4
[ 75.552503] nvme_dev_disable+0x35c/0x374
[ 75.552734] nvme_shutdown+0x34/0x40
[ 75.552956] pci_device_shutdown+0x48/0x54
[ 75.553184] device_shutdown+0x1c4/0x314
[ 75.553403] kernel_power_off+0x40/0x88
which seems to indicate that a reference is being leaked. Haven't poked
any further at it, I'll drop these two for now.
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 16:12 ` Jens Axboe
@ 2024-01-11 16:14 ` Christoph Hellwig
2024-01-11 16:17 ` Jens Axboe
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 16:14 UTC (permalink / raw)
To: Jens Axboe; +Cc: Christoph Hellwig, Ming Lei, linux-block
On Thu, Jan 11, 2024 at 09:12:23AM -0700, Jens Axboe wrote:
> On 1/11/24 6:57 AM, Christoph Hellwig wrote:
> > q_usage_counter is the only thing preventing us from the limits changing
> > under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
> >
> > Change __submit_bio to always acquire the q_usage_counter counter before
> > branching out into bio vs request based helper, and let blk_mq_submit_bio
> > tell it if it consumed the reference by handing it off to the request.
>
> This causes hangs for me on shutdown/reset:
> which seems to indicate that a reference is being leaked. Haven't poked
> any further at it, I'll drop these two for now.
Can you send me your .config?
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 16:14 ` Christoph Hellwig
@ 2024-01-11 16:17 ` Jens Axboe
2024-01-11 16:18 ` Christoph Hellwig
2024-01-11 17:10 ` Christoph Hellwig
0 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2024-01-11 16:17 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On 1/11/24 9:14 AM, Christoph Hellwig wrote:
> On Thu, Jan 11, 2024 at 09:12:23AM -0700, Jens Axboe wrote:
>> On 1/11/24 6:57 AM, Christoph Hellwig wrote:
>>> q_usage_counter is the only thing preventing us from the limits changing
>>> under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
>>>
>>> Change __submit_bio to always acquire the q_usage_counter counter before
>>> branching out into bio vs request based helper, and let blk_mq_submit_bio
>>> tell it if it consumed the reference by handing it off to the request.
>>
>> This causes hangs for me on shutdown/reset:
>
>> which seems to indicate that a reference is being leaked. Haven't poked
>> any further at it, I'll drop these two for now.
>
> Can you send me your .config?
Don't think it's .config related, hit it on my test box and then in my
vm as well. It's likely due to batched allocations, would be my guess.
I can start/halt the vm fine with just a boot, but if I do:
$ ~/git/fio/t/io_uring -p1 -d128 -b512 -s32 -c32 -F1 -B1 -R0 -X1 -P1 /dev/nvme0n1
for just a brief moment, nvme0 will hang on shutdown.
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 16:17 ` Jens Axboe
@ 2024-01-11 16:18 ` Christoph Hellwig
2024-01-11 17:10 ` Christoph Hellwig
1 sibling, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 16:18 UTC (permalink / raw)
To: Jens Axboe; +Cc: Christoph Hellwig, Ming Lei, linux-block
On Thu, Jan 11, 2024 at 09:17:41AM -0700, Jens Axboe wrote:
> Don't think it's .config related, hit it on my test box and then in my
> vm as well. It's likely due to batched allocations, would be my guess.
> I can start/halt the vm fine with just a boot, but if I do:
>
> $ ~/git/fio/t/io_uring -p1 -d128 -b512 -s32 -c32 -F1 -B1 -R0 -X1 -P1 /dev/nvme0n1
>
> for just a brief moment, nvme0 will hang on shutdown.
Thanks, I'll look into it.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 16:17 ` Jens Axboe
2024-01-11 16:18 ` Christoph Hellwig
@ 2024-01-11 17:10 ` Christoph Hellwig
2024-01-11 17:18 ` Jens Axboe
1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 17:10 UTC (permalink / raw)
To: Jens Axboe; +Cc: Christoph Hellwig, Ming Lei, linux-block
On Thu, Jan 11, 2024 at 09:17:41AM -0700, Jens Axboe wrote:
> On 1/11/24 9:14 AM, Christoph Hellwig wrote:
> > On Thu, Jan 11, 2024 at 09:12:23AM -0700, Jens Axboe wrote:
> >> On 1/11/24 6:57 AM, Christoph Hellwig wrote:
> >>> q_usage_counter is the only thing preventing us from the limits changing
> >>> under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
> >>>
> >>> Change __submit_bio to always acquire the q_usage_counter counter before
> >>> branching out into bio vs request based helper, and let blk_mq_submit_bio
> >>> tell it if it consumed the reference by handing it off to the request.
> >>
> >> This causes hangs for me on shutdown/reset:
> >
> >> which seems to indicate that a reference is being leaked. Haven't poked
> >> any further at it, I'll drop these two for now.
> >
> > Can you send me your .config?
>
> Don't think it's .config related, hit it on my test box and then in my
> vm as well. It's likely due to batched allocations, would be my guess.
> I can start/halt the vm fine with just a boot, but if I do:
Yupp, cached rqs it was. The incremental patch below fixes it.
Can you add some cached request testing to blktests so that this gets
covered by default?
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 421db29535ba50..39eb4b99320c11 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2967,8 +2967,14 @@ bool blk_mq_submit_bio(struct bio *bio)
if (rq && rq->q == q) {
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return false;
- if (blk_mq_use_cached_rq(rq, plug, bio))
+ if (blk_mq_use_cached_rq(rq, plug, bio)) {
+ /*
+ * We're using the reference already owned by
+ * rq from here on.
+ */
+ blk_queue_exit(q);
goto has_rq;
+ }
}
}
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 17:10 ` Christoph Hellwig
@ 2024-01-11 17:18 ` Jens Axboe
2024-01-11 17:24 ` Christoph Hellwig
0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2024-01-11 17:18 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On 1/11/24 10:10 AM, Christoph Hellwig wrote:
> On Thu, Jan 11, 2024 at 09:17:41AM -0700, Jens Axboe wrote:
>> On 1/11/24 9:14 AM, Christoph Hellwig wrote:
>>> On Thu, Jan 11, 2024 at 09:12:23AM -0700, Jens Axboe wrote:
>>>> On 1/11/24 6:57 AM, Christoph Hellwig wrote:
>>>>> q_usage_counter is the only thing preventing us from the limits changing
>>>>> under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
>>>>>
>>>>> Change __submit_bio to always acquire the q_usage_counter counter before
>>>>> branching out into bio vs request based helper, and let blk_mq_submit_bio
>>>>> tell it if it consumed the reference by handing it off to the request.
>>>>
>>>> This causes hangs for me on shutdown/reset:
>>>
>>>> which seems to indicate that a reference is being leaked. Haven't poked
>>>> any further at it, I'll drop these two for now.
>>>
>>> Can you send me your .config?
>>
>> Don't think it's .config related, hit it on my test box and then in my
>> vm as well. It's likely due to batched allocations, would be my guess.
>> I can start/halt the vm fine with just a boot, but if I do:
>
> Yupp, cached rqs it was. The incremental patch below fixes it.
> Can you add some cached request testing to blktests so that this gets
> covered by default?
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 421db29535ba50..39eb4b99320c11 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2967,8 +2967,14 @@ bool blk_mq_submit_bio(struct bio *bio)
> if (rq && rq->q == q) {
> if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
> return false;
> - if (blk_mq_use_cached_rq(rq, plug, bio))
> + if (blk_mq_use_cached_rq(rq, plug, bio)) {
> + /*
> + * We're using the reference already owned by
> + * rq from here on.
> + */
> + blk_queue_exit(q);
> goto has_rq;
> + }
> }
> }
This also highlights a potential inefficiency in the patch, as now we're
grabbing+dropping references when we don't need to. May not be a big
deal, but it's one of the things that cached requests got rid of. Though
I'm not quite sure how to refactor to get rid of that, as we'd need to
shuffle the splitting and request get for that.
Could you take another look at the series with that in mind?
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 17:18 ` Jens Axboe
@ 2024-01-11 17:24 ` Christoph Hellwig
2024-01-11 20:06 ` Jens Axboe
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-11 17:24 UTC (permalink / raw)
To: Jens Axboe; +Cc: Christoph Hellwig, Ming Lei, linux-block
On Thu, Jan 11, 2024 at 10:18:31AM -0700, Jens Axboe wrote:
> This also highlights a potential inefficiency in the patch, as now we're
> grabbing+dropping references when we don't need to. May not be a big
> deal, but it's one of the things that cached requests got rid of. Though
> I'm not quite sure how to refactor to get rid of that, as we'd need to
> shuffle the splitting and request get for that.
>
> Could you take another look at the series with that in mind?
I thought about it, but it gets pretty ugly quickly. bio_queue_enter
needs to move back into blk_mq_submit_bio, and then we'd skip it
initially if bio_may_exceed_limits is false, and then we later need
to add it back. (we'll probably also need to special case
blk_queue_bounce as that setting could change to. I wish we could
finally kill that)
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 17:24 ` Christoph Hellwig
@ 2024-01-11 20:06 ` Jens Axboe
2024-01-12 5:44 ` Christoph Hellwig
0 siblings, 1 reply; 33+ messages in thread
From: Jens Axboe @ 2024-01-11 20:06 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On 1/11/24 10:24 AM, Christoph Hellwig wrote:
> On Thu, Jan 11, 2024 at 10:18:31AM -0700, Jens Axboe wrote:
>> This also highlights a potential inefficiency in the patch, as now we're
>> grabbing+dropping references when we don't need to. May not be a big
>> deal, but it's one of the things that cached requests got rid of. Though
>> I'm not quite sure how to refactor to get rid of that, as we'd need to
>> shuffle the splitting and request get for that.
>>
>> Could you take another look at the series with that in mind?
>
> I thought about it, but it gets pretty ugly quickly. bio_queue_enter
> needs to move back into blk_mq_submit_bio, and then we'd skip it
> initially if bio_may_exceed_limits is false, and then we later need
> to add it back. (we'll probably also need to special case
> blk_queue_bounce as that setting could change to. I wish we could
> finally kill that)
Something like this? Not super pretty with the duplication, but...
Should suffice for a fix, and then we can refactor it on top of that.
ioprio is inherited when cloning, so we don't need to do that post the
split.
For the bounce side, how would these settings change at runtime? Should
be set at init time and then never change. And agree would be so nice to
kill that code...
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa9a05fdd023..01134e845d8d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2945,12 +2945,6 @@ void blk_mq_submit_bio(struct bio *bio)
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
- if (bio_may_exceed_limits(bio, &q->limits)) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- return;
- }
-
bio_set_ioprio(bio);
if (plug) {
@@ -2959,6 +2953,11 @@ void blk_mq_submit_bio(struct bio *bio)
rq = NULL;
}
if (rq) {
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ return;
+ }
if (!bio_integrity_prep(bio))
return;
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
@@ -2969,6 +2968,11 @@ void blk_mq_submit_bio(struct bio *bio)
} else {
if (unlikely(bio_queue_enter(bio)))
return;
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ goto fail;
+ }
if (!bio_integrity_prep(bio))
goto fail;
}
--
Jens Axboe
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 20:06 ` Jens Axboe
@ 2024-01-12 5:44 ` Christoph Hellwig
2024-01-12 14:22 ` Jens Axboe
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-12 5:44 UTC (permalink / raw)
To: Jens Axboe; +Cc: Ming Lei, linux-block, Ulf Hansson, linux-mmc
On Thu, Jan 11, 2024 at 01:06:43PM -0700, Jens Axboe wrote:
> Something like this? Not super pretty with the duplication, but...
> Should suffice for a fix, and then we can refactor it on top of that.
> ioprio is inherited when cloning, so we don't need to do that post the
> split.
Yes, this could work. It'll get worse with anything we need to do under
q_usage_counter counter, though. I mean, it is a perpcu_counter, which
should be really light-weight compared to all the other stuff you do.
I'd really love to see numbers that show it matters.
> For the bounce side, how would these settings change at runtime?
Well, we don't really prevent any setting from changing at runtime.
But yes, neither mmc nor the few scsi drivers using it seems to do
any runtime re-configuration.
> Should
> be set at init time and then never change. And agree would be so nice to
> kill that code...
I wish we could see some more folks from the mmc maintainers to do
proper scatterlist (or bio/request) kmap helpers. The scsi drivers
could easily piggy back on that or just be disabled for HIGHMEM configs.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-12 5:44 ` Christoph Hellwig
@ 2024-01-12 14:22 ` Jens Axboe
2024-01-12 14:25 ` Christoph Hellwig
2024-01-15 11:20 ` Ulf Hansson
0 siblings, 2 replies; 33+ messages in thread
From: Jens Axboe @ 2024-01-12 14:22 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block, Ulf Hansson, linux-mmc
On 1/11/24 10:44 PM, Christoph Hellwig wrote:
> On Thu, Jan 11, 2024 at 01:06:43PM -0700, Jens Axboe wrote:
>> Something like this? Not super pretty with the duplication, but...
>> Should suffice for a fix, and then we can refactor it on top of that.
>> ioprio is inherited when cloning, so we don't need to do that post the
>> split.
>
> Yes, this could work. It'll get worse with anything we need to do under
> q_usage_counter counter, though. I mean, it is a perpcu_counter, which
> should be really light-weight compared to all the other stuff you do.
> I'd really love to see numbers that show it matters.
Yep it is pretty cheap, but it's not free. Here's a test where we just
grab a ref and drop it, which should arguably be cheaper than doing a
ref at the top and dropping it at the bottom due to temporal locality:
5.01% +0.86% [kernel.vmlinux] [k] blk_mq_submit_bio
>> Should
>> be set at init time and then never change. And agree would be so nice to
>> kill that code...
>
> I wish we could see some more folks from the mmc maintainers to do
> proper scatterlist (or bio/request) kmap helpers. The scsi drivers
> could easily piggy back on that or just be disabled for HIGHMEM configs.
Maybe we just nudge them that way by making them depend on !HIGHMEM...
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-12 14:22 ` Jens Axboe
@ 2024-01-12 14:25 ` Christoph Hellwig
2024-01-12 16:10 ` Jens Axboe
2024-01-15 11:20 ` Ulf Hansson
1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-12 14:25 UTC (permalink / raw)
To: Jens Axboe
Cc: Christoph Hellwig, Ming Lei, linux-block, Ulf Hansson, linux-mmc
On Fri, Jan 12, 2024 at 07:22:18AM -0700, Jens Axboe wrote:
> Yep it is pretty cheap, but it's not free. Here's a test where we just
> grab a ref and drop it, which should arguably be cheaper than doing a
> ref at the top and dropping it at the bottom due to temporal locality:
>
> 5.01% +0.86% [kernel.vmlinux] [k] blk_mq_submit_bio
Ok. Do you want to send out your version formally?
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-12 14:25 ` Christoph Hellwig
@ 2024-01-12 16:10 ` Jens Axboe
0 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2024-01-12 16:10 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block, Ulf Hansson, linux-mmc
On 1/12/24 7:25 AM, Christoph Hellwig wrote:
> On Fri, Jan 12, 2024 at 07:22:18AM -0700, Jens Axboe wrote:
>> Yep it is pretty cheap, but it's not free. Here's a test where we just
>> grab a ref and drop it, which should arguably be cheaper than doing a
>> ref at the top and dropping it at the bottom due to temporal locality:
>>
>> 5.01% +0.86% [kernel.vmlinux] [k] blk_mq_submit_bio
>
> Ok. Do you want to send out your version formally?
Sure, can do. I'll pick your first patch here as that one makes
sense separately.
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-12 14:22 ` Jens Axboe
2024-01-12 14:25 ` Christoph Hellwig
@ 2024-01-15 11:20 ` Ulf Hansson
2024-01-22 7:34 ` mmc vs highmem, was: " Christoph Hellwig
1 sibling, 1 reply; 33+ messages in thread
From: Ulf Hansson @ 2024-01-15 11:20 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: Ming Lei, linux-block, linux-mmc, Arnd Bergmann, Linus Walleij
+ Arnd, Linus
On Fri, 12 Jan 2024 at 15:22, Jens Axboe <axboe@kernel.dk> wrote:
>
> On 1/11/24 10:44 PM, Christoph Hellwig wrote:
> > On Thu, Jan 11, 2024 at 01:06:43PM -0700, Jens Axboe wrote:
> >> Something like this? Not super pretty with the duplication, but...
> >> Should suffice for a fix, and then we can refactor it on top of that.
> >> ioprio is inherited when cloning, so we don't need to do that post the
> >> split.
> >
> > Yes, this could work. It'll get worse with anything we need to do under
> > q_usage_counter counter, though. I mean, it is a perpcu_counter, which
> > should be really light-weight compared to all the other stuff you do.
> > I'd really love to see numbers that show it matters.
>
> Yep it is pretty cheap, but it's not free. Here's a test where we just
> grab a ref and drop it, which should arguably be cheaper than doing a
> ref at the top and dropping it at the bottom due to temporal locality:
>
> 5.01% +0.86% [kernel.vmlinux] [k] blk_mq_submit_bio
>
> >> Should
> >> be set at init time and then never change. And agree would be so nice to
> >> kill that code...
> >
> > I wish we could see some more folks from the mmc maintainers to do
> > proper scatterlist (or bio/request) kmap helpers. The scsi drivers
> > could easily piggy back on that or just be disabled for HIGHMEM configs.
>
> Maybe we just nudge them that way by making them depend on !HIGHMEM...
>
> --
> Jens Axboe
>
Not sure exactly what problem you are trying to solve here, but I am
certainly happy to help, if I can.
Can you perhaps point me to a couple of drivers that need to be converted?
Kind regards
Uffe
^ permalink raw reply [flat|nested] 33+ messages in thread
* mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-15 11:20 ` Ulf Hansson
@ 2024-01-22 7:34 ` Christoph Hellwig
2024-01-22 9:26 ` Arnd Bergmann
0 siblings, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-22 7:34 UTC (permalink / raw)
To: Ulf Hansson
Cc: Christoph Hellwig, Jens Axboe, Ming Lei, linux-block, linux-mmc,
Arnd Bergmann, Linus Walleij
On Mon, Jan 15, 2024 at 12:20:50PM +0100, Ulf Hansson wrote:
> Not sure exactly what problem you are trying to solve here, but I am
> certainly happy to help, if I can.
>
> Can you perhaps point me to a couple of drivers that need to be converted?
Sure.
mmc_alloc_disk sets BLK_BOUNCE_HIGH for any mmc host that doesn't have a
DMA mask set, which is a bit odd as all proper devices should have a
valid DMA mask. I suspect platform devices might sometimes not have
one, which historically was the wild west.
A better indicator might be the use of page_address in the I/O path,
which usually comes in the form of using the sg_virt() helper.
For drivers/mmc/ that seems to be: davinci_mmc, moxart-mmc, mvsdio,
mxcmmc, omap, sdhci-esdhc-mcf and sh_mmcif.
tmio_mmc_core on the other hand seems to have code properly kmapping
a scatterlist, which might be worth lifting into a common helper to
help the above drivers.
>
> Kind regards
> Uffe
---end quoted text---
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 7:34 ` mmc vs highmem, was: " Christoph Hellwig
@ 2024-01-22 9:26 ` Arnd Bergmann
2024-01-22 13:39 ` Christoph Hellwig
2024-01-24 13:49 ` Linus Walleij
0 siblings, 2 replies; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-22 9:26 UTC (permalink / raw)
To: Christoph Hellwig, Ulf Hansson
Cc: Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij
On Mon, Jan 22, 2024, at 08:34, Christoph Hellwig wrote:
> On Mon, Jan 15, 2024 at 12:20:50PM +0100, Ulf Hansson wrote:
>> Not sure exactly what problem you are trying to solve here, but I am
>> certainly happy to help, if I can.
>>
>> Can you perhaps point me to a couple of drivers that need to be converted?
>
> Sure.
>
> mmc_alloc_disk sets BLK_BOUNCE_HIGH for any mmc host that doesn't have a
> DMA mask set, which is a bit odd as all proper devices should have a
> valid DMA mask. I suspect platform devices might sometimes not have
> one, which historically was the wild west.
I found five drivers that have a legacy platform device
definition without a DMA mask:
arch/m68k/coldfire/device.c: "sdhci-esdhc-mcf"
arch/arm/mach-omap1/devices.c: "mmci-omap" (slave DMA)
arch/sh/boards/board-sh7757lcr.c: "sh_mmcif" (slave DMA)
arch/sh/boards/mach-ecovec24/setup.c: sh_mmcif" (slave DMA)
arch/sh/boards/mach-*/setup.c: "sh_mobile_sdhi" (slave DMA)
drivers/misc/cb710/core.c: "cb710-mmc" (pio-only)
None of these embedded platforms actually have highmem,
though the omap1 machine may run a kernel that has highmem
support enabled.
Most of the others only support DT based probing after we
removed a lot of old board files a year ago, so they will
always have a 32-bit mask set at probe time.
The slave DMA case is interesting,
> A better indicator might be the use of page_address in the I/O path,
> which usually comes in the form of using the sg_virt() helper.
> For drivers/mmc/ that seems to be: davinci_mmc, moxart-mmc, mvsdio,
> mxcmmc, omap, sdhci-esdhc-mcf and sh_mmcif.
Out of these, I think only the mvsdio one is actually use
on boards with highmem: davinci, moxart, mcx (imx3) and omap2
are old enough to never have had more than 256MB or so of RAM,
and mcf (m68k) and sh can't even be built with CONFIG_HIGHMEM.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 9:26 ` Arnd Bergmann
@ 2024-01-22 13:39 ` Christoph Hellwig
2024-01-22 14:57 ` Arnd Bergmann
2024-01-24 13:49 ` Linus Walleij
1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-22 13:39 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij
On Mon, Jan 22, 2024 at 10:26:30AM +0100, Arnd Bergmann wrote:
> > A better indicator might be the use of page_address in the I/O path,
> > which usually comes in the form of using the sg_virt() helper.
> > For drivers/mmc/ that seems to be: davinci_mmc, moxart-mmc, mvsdio,
> > mxcmmc, omap, sdhci-esdhc-mcf and sh_mmcif.
>
> Out of these, I think only the mvsdio one is actually use
> on boards with highmem: davinci, moxart, mcx (imx3) and omap2
> are old enough to never have had more than 256MB or so of RAM,
> and mcf (m68k) and sh can't even be built with CONFIG_HIGHMEM.
It would be good to fix the one or two that could use highmem and add a
depends on !HIGHMEM for the others and then kill the bounce setup in
mmc. It turn out in addition to the one legacy ISA SCSI driver and the
two parport SCSI driver usb-storage actually also sets this flag,
which might be a road blocker, but at this point I'm getting ready
to just pull the plug if it doesn't break using embedded platforms
using mmc entirely.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 13:39 ` Christoph Hellwig
@ 2024-01-22 14:57 ` Arnd Bergmann
2024-01-23 9:11 ` Christoph Hellwig
2024-01-24 12:45 ` Arnd Bergmann
0 siblings, 2 replies; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-22 14:57 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij, Andrew Lunn,
Gregory Clement, Sebastian Hesselbarth
On Mon, Jan 22, 2024, at 14:39, Christoph Hellwig wrote:
> On Mon, Jan 22, 2024 at 10:26:30AM +0100, Arnd Bergmann wrote:
>> > A better indicator might be the use of page_address in the I/O path,
>> > which usually comes in the form of using the sg_virt() helper.
>> > For drivers/mmc/ that seems to be: davinci_mmc, moxart-mmc, mvsdio,
>> > mxcmmc, omap, sdhci-esdhc-mcf and sh_mmcif.
>>
>> Out of these, I think only the mvsdio one is actually use
>> on boards with highmem: davinci, moxart, mxc (imx3) and omap2
>> are old enough to never have had more than 256MB or so of RAM,
>> and mcf (m68k) and sh can't even be built with CONFIG_HIGHMEM.
>
> It would be good to fix the one or two that could use highmem and add a
> depends on !HIGHMEM for the others
I would prefer a runtime check here, as one might still have a
multiplatform kernel where one machine can use highmem and
another machine can use one of these drivers, e.g. in
imx_v6_v7_defconfig.
> and then kill the bounce setup in
> mmc. It turn out in addition to the one legacy ISA SCSI driver and the
> two parport SCSI driver usb-storage actually also sets this flag,
> which might be a road blocker, but at this point I'm getting ready
> to just pull the plug if it doesn't break using embedded platforms
> using mmc entirely.
Agreed. I've added the maintainers for the Marvell Armada
platform to Cc, maybe one of them can look into this, see
the thread at [1] for this. The problem in the mvsdio.c
is the sg_virt(data->sg) in mvsd_setup_data(), which will
stop working when the MMC layer stops using bounce buffers
for highmem pages.
Arnd
[1] https://lore.kernel.org/all/20240122073423.GA25859@lst.de/
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 14:57 ` Arnd Bergmann
@ 2024-01-23 9:11 ` Christoph Hellwig
2024-01-24 11:59 ` Arnd Bergmann
2024-01-24 12:45 ` Arnd Bergmann
1 sibling, 1 reply; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-23 9:11 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij, Andrew Lunn,
Gregory Clement, Sebastian Hesselbarth
On Mon, Jan 22, 2024 at 03:57:16PM +0100, Arnd Bergmann wrote:
> > It would be good to fix the one or two that could use highmem and add a
> > depends on !HIGHMEM for the others
>
> I would prefer a runtime check here, as one might still have a
> multiplatform kernel where one machine can use highmem and
> another machine can use one of these drivers, e.g. in
> imx_v6_v7_defconfig.
Well, if someone can come up with a good runtime check that's fine with me.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-23 9:11 ` Christoph Hellwig
@ 2024-01-24 11:59 ` Arnd Bergmann
2024-01-24 12:33 ` Linus Walleij
0 siblings, 1 reply; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-24 11:59 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij, Andrew Lunn,
Gregory Clement, Sebastian Hesselbarth
On Tue, Jan 23, 2024, at 10:11, Christoph Hellwig wrote:
> On Mon, Jan 22, 2024 at 03:57:16PM +0100, Arnd Bergmann wrote:
>> > It would be good to fix the one or two that could use highmem and add a
>> > depends on !HIGHMEM for the others
>>
>> I would prefer a runtime check here, as one might still have a
>> multiplatform kernel where one machine can use highmem and
>> another machine can use one of these drivers, e.g. in
>> imx_v6_v7_defconfig.
>
> Well, if someone can come up with a good runtime check that's fine with me.
I assumed there was a generic way already, but it looks like
there is not, and adding one across nine architectures would be
nontrivial.
Let's use your initial suggestion then and use a Kconfig
dependency. I still don't like how this may impact users that
currently enable highmem and use one of these drivers, but
on a more positive note this might help us kill off HIGHMEM
in the future if users instead choose to go with
CONFIG_VMSPLIT_3G_OPT or similar.
The Marvell driver still needs some other solution of course.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-24 11:59 ` Arnd Bergmann
@ 2024-01-24 12:33 ` Linus Walleij
2024-01-24 12:54 ` Arnd Bergmann
0 siblings, 1 reply; 33+ messages in thread
From: Linus Walleij @ 2024-01-24 12:33 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Andrew Lunn, Gregory Clement,
Sebastian Hesselbarth
On Wed, Jan 24, 2024 at 12:59 PM Arnd Bergmann <arnd@arndb.de> wrote:
> Let's use your initial suggestion then and use a Kconfig
> dependency.
I'm looking into this.
> I still don't like how this may impact users that
> currently enable highmem and use one of these drivers,
I'm taking it on a machine-by-machine basis.
For example it
turns out all OMAP2 that use mmci-omap are Nokia n800, n810
and the H4 reference design.
So I am seeing if these can be excluded from the "most omap2plus
systems" list.
Yours,
Linus Walleij
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-24 12:33 ` Linus Walleij
@ 2024-01-24 12:54 ` Arnd Bergmann
2024-01-24 13:16 ` Linus Walleij
0 siblings, 1 reply; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-24 12:54 UTC (permalink / raw)
To: Linus Walleij
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Andrew Lunn, Gregory Clement,
Sebastian Hesselbarth
On Wed, Jan 24, 2024, at 13:33, Linus Walleij wrote:
> On Wed, Jan 24, 2024 at 12:59 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
>> Let's use your initial suggestion then and use a Kconfig
>> dependency.
>
> I'm looking into this.
>
>> I still don't like how this may impact users that
>> currently enable highmem and use one of these drivers,
>
> I'm taking it on a machine-by-machine basis.
>
> For example it
> turns out all OMAP2 that use mmci-omap are Nokia n800, n810
> and the H4 reference design.
>
> So I am seeing if these can be excluded from the "most omap2plus
> systems" list.
Unfortunately excluding Nokia n8x0 would turn the omap2plus
defconfig into an omap3plus_defconfig effectively.
This is also something that has come up repeatedly in the
past though: omap24xx (and imx31 to a lesser degree) support
in multiplatform kernels is really annoying because it
requires a lot of special cases to also work on SMP enabled
machines, and it completely breaks on ARMv8-aarch32 machines.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-24 12:54 ` Arnd Bergmann
@ 2024-01-24 13:16 ` Linus Walleij
2024-01-24 14:14 ` Arnd Bergmann
0 siblings, 1 reply; 33+ messages in thread
From: Linus Walleij @ 2024-01-24 13:16 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Andrew Lunn, Gregory Clement,
Sebastian Hesselbarth
On Wed, Jan 24, 2024 at 1:55 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > So I am seeing if these can be excluded from the "most omap2plus
> > systems" list.
>
> Unfortunately excluding Nokia n8x0 would turn the omap2plus
> defconfig into an omap3plus_defconfig effectively.
I did like this:
@@ -135,7 +135,7 @@ config ARCH_OMAP2PLUS_TYPICAL
bool "Typical OMAP configuration"
default y
select AEABI
- select HIGHMEM
+ select HIGHMEM if !SOC_OMAP2420
Effectively disabling HIGHMEM when using omap2plus_defconfig.
If we want all systems supported, we just apply this at the expense
of highmem for OMAP 2430, OMAP3 and OMAP4 and the
We can then either
- Disable SOC_OMAP2420 in omap2plus_defconfig (I made a
patch for this) turning it
into an omap3plus_defconfig as you say
or
- Actually add a new defconfig named omap3plus_defconfig
with highmem enabled but SOC_OMAP2420 disabled.
I don't know which option is the lesser evil ... it's a bit hairy.
(A third option would be to reexamine runtime restriction options...)
Yours,
Linus Walleij
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-24 13:16 ` Linus Walleij
@ 2024-01-24 14:14 ` Arnd Bergmann
0 siblings, 0 replies; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-24 14:14 UTC (permalink / raw)
To: Linus Walleij
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Andrew Lunn, Gregory Clement,
Sebastian Hesselbarth
On Wed, Jan 24, 2024, at 14:16, Linus Walleij wrote:
> On Wed, Jan 24, 2024 at 1:55 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
>> > So I am seeing if these can be excluded from the "most omap2plus
>> > systems" list.
>>
>> Unfortunately excluding Nokia n8x0 would turn the omap2plus
>> defconfig into an omap3plus_defconfig effectively.
>
> I did like this:
>
> @@ -135,7 +135,7 @@ config ARCH_OMAP2PLUS_TYPICAL
> bool "Typical OMAP configuration"
> default y
> select AEABI
> - select HIGHMEM
> + select HIGHMEM if !SOC_OMAP2420
>
> Effectively disabling HIGHMEM when using omap2plus_defconfig.
>
> If we want all systems supported, we just apply this at the expense
> of highmem for OMAP 2430, OMAP3 and OMAP4 and the
As far as I can tell, none of the above actually have more than
1GB of RAM, as OMAP4/AM4 maxes out at a single 8Gbit LPDDR2
RAM. For those machines, using CONFIG_VMSPLIT_3G_OPT is likely
going to be much better than CONFIG_HIGHMEM anyway.
Unfortunately, this does not work for OMAP5/AM5/DRA7, which
can have 2GB or possibly 4GB (as used in the Pyra) of DDR3,
so we'd still lose.
> We can then either
>
> - Disable SOC_OMAP2420 in omap2plus_defconfig (I made a
> patch for this) turning it
> into an omap3plus_defconfig as you say
>
> or
>
> - Actually add a new defconfig named omap3plus_defconfig
> with highmem enabled but SOC_OMAP2420 disabled.
>
> I don't know which option is the lesser evil ... it's a bit hairy.
>
> (A third option would be to reexamine runtime restriction options...)
Or actually using kmap_local_page() in mmc_omap_xfer_data(),
as Christoph suggested.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 14:57 ` Arnd Bergmann
2024-01-23 9:11 ` Christoph Hellwig
@ 2024-01-24 12:45 ` Arnd Bergmann
1 sibling, 0 replies; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-24 12:45 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org, Linus Walleij, Andrew Lunn,
Gregory Clement, Sebastian Hesselbarth
On Mon, Jan 22, 2024, at 15:57, Arnd Bergmann wrote:
> On Mon, Jan 22, 2024, at 14:39, Christoph Hellwig wrote:
>> On Mon, Jan 22, 2024 at 10:26:30AM +0100, Arnd Bergmann wrote:
>
>> and then kill the bounce setup in
>> mmc. It turn out in addition to the one legacy ISA SCSI driver and the
>> two parport SCSI driver usb-storage actually also sets this flag,
>> which might be a road blocker, but at this point I'm getting ready
>> to just pull the plug if it doesn't break using embedded platforms
>> using mmc entirely.
>
> Agreed. I've added the maintainers for the Marvell Armada
> platform to Cc, maybe one of them can look into this, see
> the thread at [1] for this. The problem in the mvsdio.c
> is the sg_virt(data->sg) in mvsd_setup_data(), which will
> stop working when the MMC layer stops using bounce buffers
> for highmem pages.
I talked to Linus Walleij about this driver, as he's already
looking at highmem related issues in Arm kernels. Here is
some updated information about what I think is going on:
- The driver is used on kirkwood, dove, armadaxp/370, and
armada375, but not on armada380 and later, as those use
sdhci instead.
- Most of the boards with those chips have the sdio controller
disabled. In particular for the later chips it is only
enabled in the reference design but not in products like the
Armada XP based NAS boxes.
- Products that actually use it and have at least 1GB of RAM
are the Globalscale Mirabox/D3Plug and Solidrun Cubox.
- All of the machines using this driver have a valid DMA mask
set, so the BLK_BOUNCE_HIGH hack is never actually active
here.
- The reason I think the driver still works is that the
PIO mode using the broken sg_virt() call is only used
for unaligned data, and all data sent to the controller is
either fully aligned (mmc block data) or in lowmem (sdio
data structures filled in the kernel).
- The hack for non-caheline-aligned DMA in commit 3c583f70a8e2
("mmc: mvsdio: Work around broken TX DMA") specifically
mentions small data structures. I wonder if this is instead
a bug with dma_mapping stack variables or similar instead of
an actual hardware bug. The only sdio_writesb() I see in the
mwifiex driver mentioned here is for network data buffers,
which I think don't ever share cache lines though.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-22 9:26 ` Arnd Bergmann
2024-01-22 13:39 ` Christoph Hellwig
@ 2024-01-24 13:49 ` Linus Walleij
2024-01-24 16:35 ` Arnd Bergmann
1 sibling, 1 reply; 33+ messages in thread
From: Linus Walleij @ 2024-01-24 13:49 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org
On Mon, Jan 22, 2024 at 10:26 AM Arnd Bergmann <arnd@arndb.de> wrote:
> I found five drivers that have a legacy platform device
> definition without a DMA mask:
>
> arch/m68k/coldfire/device.c: "sdhci-esdhc-mcf"
> arch/arm/mach-omap1/devices.c: "mmci-omap" (slave DMA)
> arch/sh/boards/board-sh7757lcr.c: "sh_mmcif" (slave DMA)
> arch/sh/boards/mach-ecovec24/setup.c: sh_mmcif" (slave DMA)
> arch/sh/boards/mach-*/setup.c: "sh_mobile_sdhi" (slave DMA)
> drivers/misc/cb710/core.c: "cb710-mmc" (pio-only)
>
> None of these embedded platforms actually have highmem,
> though the omap1 machine may run a kernel that has highmem
> support enabled.
>
> Most of the others only support DT based probing after we
> removed a lot of old board files a year ago, so they will
> always have a 32-bit mask set at probe time.
For sh_mmcif I just added dma_mask and coherent_dma_mask
as DMA_BIT_MASK(32) in the boardfile and I consider doing it
for pretty much all of them: If they
- Run without HIGHMEM enabled and
- With highmem are bouncing buffers around to PKMAP (right?) when
BLK_BOUNCE_HIGH is set
That kind of indicates that they are
probably 32bit DMA capable, pretty much as the device trees
assumes in most cases.
This avoids doing Kconfig trickery, make it runtime handled
and we can delete BLK_BOUNCE_HIGH as that branch is
never taken and just refuse to probe if dma_mask == 0.
Yours,
Linus Walleij
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: mmc vs highmem, was: Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-24 13:49 ` Linus Walleij
@ 2024-01-24 16:35 ` Arnd Bergmann
0 siblings, 0 replies; 33+ messages in thread
From: Arnd Bergmann @ 2024-01-24 16:35 UTC (permalink / raw)
To: Linus Walleij
Cc: Christoph Hellwig, Ulf Hansson, Jens Axboe, Ming Lei, linux-block,
linux-mmc @ vger . kernel . org
On Wed, Jan 24, 2024, at 14:49, Linus Walleij wrote:
> On Mon, Jan 22, 2024 at 10:26 AM Arnd Bergmann <arnd@arndb.de> wrote:
>
>> I found five drivers that have a legacy platform device
>> definition without a DMA mask:
>>
>> arch/m68k/coldfire/device.c: "sdhci-esdhc-mcf"
>> arch/arm/mach-omap1/devices.c: "mmci-omap" (slave DMA)
>> arch/sh/boards/board-sh7757lcr.c: "sh_mmcif" (slave DMA)
>> arch/sh/boards/mach-ecovec24/setup.c: sh_mmcif" (slave DMA)
>> arch/sh/boards/mach-*/setup.c: "sh_mobile_sdhi" (slave DMA)
>> drivers/misc/cb710/core.c: "cb710-mmc" (pio-only)
>>
>> None of these embedded platforms actually have highmem,
>> though the omap1 machine may run a kernel that has highmem
>> support enabled.
>>
>> Most of the others only support DT based probing after we
>> removed a lot of old board files a year ago, so they will
>> always have a 32-bit mask set at probe time.
>
> For sh_mmcif I just added dma_mask and coherent_dma_mask
> as DMA_BIT_MASK(32) in the boardfile
I think technically it's wrong to set the DMA mask
for the sh_mmcif device. The mask is set correctly
for the dmaengine driver, which is used correctly in
ret = dma_map_sg(chan->device->dev, sg, data->sg_len,
DMA_FROM_DEVICE);
Since SH never has highmem, I don't think there is
anything that needs to be done for these.
Also for cb710, there is no DMA, and highmem gets
handled correctly through using sg_miter_next() etc.
> and I consider doing it
> for pretty much all of them: If they
> - Run without HIGHMEM enabled and
> - With highmem are bouncing buffers around to PKMAP (right?) when
> BLK_BOUNCE_HIGH is set
> That kind of indicates that they are
> probably 32bit DMA capable, pretty much as the device trees
> assumes in most cases.
> This avoids doing Kconfig trickery, make it runtime handled
> and we can delete BLK_BOUNCE_HIGH as that branch is
> never taken and just refuse to probe if dma_mask == 0.
We can probably treat this as two different issues now:
a) drivers that don't set a DMA mask get the block layer
bounce buffers, and as far as I can tell none of these
actually need the bounce buffers, so we can already
remove the BLK_BOUNCE_HIGH setting without causing
a chance in behavior. cb710 is likely to see a
small performance improvement when used on highmem
systems without BLK_BOUNCE_HIGH but it's a very slow
and old device, so nobody will notice
b) drivers that use sg_virt() instead of kmap_local_page()
or sg_iter are currently broken if they run on on
systems using highmem, as there is no bounce buffer
when the DMA mask is set. Some of these may have
used BLK_BOUNCE_HIGH in the past before the conversion
to DT probing.
It's the second point that requires the Kconfig dependency.
Arnd
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 13:57 ` [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios Christoph Hellwig
2024-01-11 16:12 ` Jens Axboe
@ 2024-01-11 22:22 ` Ming Lei
2024-01-12 5:46 ` Christoph Hellwig
1 sibling, 1 reply; 33+ messages in thread
From: Ming Lei @ 2024-01-11 22:22 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Jens Axboe, linux-block, ming.lei
On Thu, Jan 11, 2024 at 02:57:05PM +0100, Christoph Hellwig wrote:
> q_usage_counter is the only thing preventing us from the limits changing
> under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
Can you share why the limits change matters wrt. split? If queue is
live, both new and old one are supposed to work, such as, we don't
freeze queue when changing `max_sectors_kb` via sysfs.
>
> Change __submit_bio to always acquire the q_usage_counter counter before
> branching out into bio vs request based helper, and let blk_mq_submit_bio
> tell it if it consumed the reference by handing it off to the request.
>
> Fixes: 9d497e2941c3 ("block: don't protect submit_bio_checks by q_usage_counter")
The above tag looks wrong, the logic change is actually from commit
900e08075202 ("block: move queue enter logic into blk_mq_submit_bio()")
And commit 9d497e2941c3 doesn't move q_usage_counter from
blk_mq_submit_bio() or bio split.
Thanks,
Ming
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
2024-01-11 22:22 ` Ming Lei
@ 2024-01-12 5:46 ` Christoph Hellwig
0 siblings, 0 replies; 33+ messages in thread
From: Christoph Hellwig @ 2024-01-12 5:46 UTC (permalink / raw)
To: Ming Lei; +Cc: Christoph Hellwig, Jens Axboe, linux-block
On Fri, Jan 12, 2024 at 06:22:59AM +0800, Ming Lei wrote:
> On Thu, Jan 11, 2024 at 02:57:05PM +0100, Christoph Hellwig wrote:
> > q_usage_counter is the only thing preventing us from the limits changing
> > under us in __bio_split_to_limits, but blk_mq_submit_bio doesn't hold it.
>
> Can you share why the limits change matters wrt. split? If queue is
> live, both new and old one are supposed to work, such as, we don't
> freeze queue when changing `max_sectors_kb` via sysfs.
Hardware reconfigurations for example, which can happen in nvme for
just about every limit, in SCSI for limits controller by the ULD,
and in quite a few places for disabling write zeroes or discard.
^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: ensure q_usage_counter is held over bio splits
2024-01-11 13:57 ensure q_usage_counter is held over bio splits Christoph Hellwig
2024-01-11 13:57 ` [PATCH 1/2] blk-mq: rename blk_mq_can_use_cached_rq Christoph Hellwig
2024-01-11 13:57 ` [PATCH 2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios Christoph Hellwig
@ 2024-01-11 16:03 ` Jens Axboe
2024-01-14 14:38 ` (subset) " Jens Axboe
3 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2024-01-11 16:03 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On Thu, 11 Jan 2024 14:57:03 +0100, Christoph Hellwig wrote:
> current blk_submit_bio can call into the bio splitting code without
> q_usage_counter held, which can lead to inconsistent limits beeing
> applied for drivers that change the limits at runtime.
>
> The first patch in the series is a small comment and naming cleanup,
> and the second one ensures we always hold q_usage_counter before
> even entering blk_mq_submit_bio.
>
> [...]
Applied, thanks!
[1/2] blk-mq: rename blk_mq_can_use_cached_rq
commit: edc2e480a9f242c91fc7bb64d6de77af75bd59b5
[2/2] blk-mq: ensure a q_usage_counter reference is held when splitting bios
commit: 744bdfb30b119f011d800e865623f3eb0a28c949
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: (subset) ensure q_usage_counter is held over bio splits
2024-01-11 13:57 ensure q_usage_counter is held over bio splits Christoph Hellwig
` (2 preceding siblings ...)
2024-01-11 16:03 ` ensure q_usage_counter is held over bio splits Jens Axboe
@ 2024-01-14 14:38 ` Jens Axboe
3 siblings, 0 replies; 33+ messages in thread
From: Jens Axboe @ 2024-01-14 14:38 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Ming Lei, linux-block
On Thu, 11 Jan 2024 14:57:03 +0100, Christoph Hellwig wrote:
> current blk_submit_bio can call into the bio splitting code without
> q_usage_counter held, which can lead to inconsistent limits beeing
> applied for drivers that change the limits at runtime.
>
> The first patch in the series is a small comment and naming cleanup,
> and the second one ensures we always hold q_usage_counter before
> even entering blk_mq_submit_bio.
>
> [...]
Applied, thanks!
[1/2] blk-mq: rename blk_mq_can_use_cached_rq
commit: 309ce6741430b5a7b5e85cd1a7569647f8d9dfa6
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 33+ messages in thread