* [PATCH 1/5] block: move discard checks into the ioctl handler
2024-03-12 14:48 RFCv2: fix fatal signal handling in __blkdev_issue_discard Christoph Hellwig
@ 2024-03-12 14:48 ` Christoph Hellwig
2024-03-12 14:48 ` [PATCH 2/5] block: add a bio_chain_and_submit helper Christoph Hellwig
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2024-03-12 14:48 UTC (permalink / raw)
To: Jens Axboe, Chandan Babu R, Keith Busch
Cc: linux-block, linux-nvme, linux-xfs
Most bio operations get basic sanity checking in submit_bio and anything
more complicated than that is done in the callers. Discards are a bit
different from that in that a lot of checking is done in
__blkdev_issue_discard, and the specific errnos for that are returned
to userspace. Move the checks that require specific errnos to the ioctl
handler instead and replace the existing kernel sector alignment check
with the actual alignment check based on the logical block size. This
leaves jut the basic sanity checking in submit_bio for the other
submitters of discards and introduces two changes in behavior:
1) the logical block size alignment check of the start and len is lost
for non-ioctl callers.
This matches what is done for other operations including reads and
writes. We should probably verify this for all bios, but for now
make discards match the normal flow.
2) for non-ioctl callers all errors are reported on I/O completion now
instead of synchronously. Callers in general mostly ignore or log
errors so this will actually simplify the code once cleaned up
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-lib.c | 20 --------------------
block/ioctl.c | 13 +++++++++----
2 files changed, 9 insertions(+), 24 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index dc8e35d0a51d6d..50923508a32466 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,26 +59,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
struct bio *bio = *biop;
- sector_t bs_mask;
-
- if (bdev_read_only(bdev))
- return -EPERM;
- if (!bdev_max_discard_sectors(bdev))
- return -EOPNOTSUPP;
-
- /* In case the discard granularity isn't set by buggy device driver */
- if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
- pr_err_ratelimited("%pg: Error: discard_granularity is 0.\n",
- bdev);
- return -EOPNOTSUPP;
- }
-
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- if (!nr_sects)
- return -EINVAL;
while (nr_sects) {
sector_t req_sects =
diff --git a/block/ioctl.c b/block/ioctl.c
index 0c76137adcaaa5..57c8171fda93c5 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -95,6 +95,8 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> SECTOR_SHIFT) - 1;
+ sector_t sector, nr_sects;
uint64_t range[2];
uint64_t start, len;
struct inode *inode = bdev->bd_inode;
@@ -105,18 +107,21 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
if (!bdev_max_discard_sectors(bdev))
return -EOPNOTSUPP;
+ if (bdev_read_only(bdev))
+ return -EPERM;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
start = range[0];
len = range[1];
+ sector = start >> SECTOR_SHIFT;
+ nr_sects = len >> SECTOR_SHIFT;
- if (start & 511)
+ if (!nr_sects)
return -EINVAL;
- if (len & 511)
+ if ((sector | nr_sects) & bs_mask)
return -EINVAL;
-
if (start + len > bdev_nr_bytes(bdev))
return -EINVAL;
@@ -124,7 +129,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
goto fail;
- err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+ err = blkdev_issue_discard(bdev, sector, nr_sects, GFP_KERNEL);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/5] block: add a bio_chain_and_submit helper
2024-03-12 14:48 RFCv2: fix fatal signal handling in __blkdev_issue_discard Christoph Hellwig
2024-03-12 14:48 ` [PATCH 1/5] block: move discard checks into the ioctl handler Christoph Hellwig
@ 2024-03-12 14:48 ` Christoph Hellwig
2024-03-12 14:48 ` [PATCH 3/5] block: add a blk_alloc_discard_bio helper Christoph Hellwig
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2024-03-12 14:48 UTC (permalink / raw)
To: Jens Axboe, Chandan Babu R, Keith Busch
Cc: linux-block, linux-nvme, linux-xfs
This is basically blk_next_bio just with the bio allocation moved
to the caller to allow for more flexible bio handling in the caller.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/bio.c | 28 ++++++++++++++++++++--------
include/linux/bio.h | 1 +
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index d24420ed1c4c6f..32ff538b29e564 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -345,18 +345,30 @@ void bio_chain(struct bio *bio, struct bio *parent)
}
EXPORT_SYMBOL(bio_chain);
-struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
- unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
+/**
+ * bio_chain_and_submit - submit a bio after chaining it to another one
+ * @prev: bio to chain and submit
+ * @new: bio to chain to
+ *
+ * If @prev is non-NULL, chain it to @new and submit it.
+ *
+ * Return: @new.
+ */
+struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new)
{
- struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
-
- if (bio) {
- bio_chain(bio, new);
- submit_bio(bio);
+ if (prev) {
+ bio_chain(prev, new);
+ submit_bio(prev);
}
-
return new;
}
+EXPORT_SYMBOL_GPL(bio_chain_and_submit);
+
+struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
+ unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
+{
+ return bio_chain_and_submit(bio, bio_alloc(bdev, nr_pages, opf, gfp));
+}
EXPORT_SYMBOL_GPL(blk_next_bio);
static void bio_alloc_rescue(struct work_struct *work)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 875d792bffff82..643d61b7cb82f7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -824,5 +824,6 @@ static inline void bio_clear_polled(struct bio *bio)
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
+struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
#endif /* __LINUX_BIO_H */
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 3/5] block: add a blk_alloc_discard_bio helper
2024-03-12 14:48 RFCv2: fix fatal signal handling in __blkdev_issue_discard Christoph Hellwig
2024-03-12 14:48 ` [PATCH 1/5] block: move discard checks into the ioctl handler Christoph Hellwig
2024-03-12 14:48 ` [PATCH 2/5] block: add a bio_chain_and_submit helper Christoph Hellwig
@ 2024-03-12 14:48 ` Christoph Hellwig
2024-03-12 14:48 ` [PATCH 4/5] block: move await_bio_chain to bio.c Christoph Hellwig
2024-03-12 14:48 ` [PATCH 5/5] block: don't allow fatal signals to interrupt (__)blkdev_issue_discard Christoph Hellwig
4 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2024-03-12 14:48 UTC (permalink / raw)
To: Jens Axboe, Chandan Babu R, Keith Busch
Cc: linux-block, linux-nvme, linux-xfs
Factor out a helper from __blkdev_issue_discard that chews off as much as
possible from a discard range and allocates a bio for it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-lib.c | 58 ++++++++++++++++++++++++++-------------------
include/linux/bio.h | 3 +++
2 files changed, 37 insertions(+), 24 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 50923508a32466..fd97f4dd34e7f4 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -55,36 +55,46 @@ static void await_bio_chain(struct bio *bio)
blk_wait_io(&done);
}
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
+{
+ sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector));
+ struct bio *bio;
+
+ if (WARN_ON_ONCE(!(gfp_mask & __GFP_RECLAIM)))
+ return NULL;
+ if (!bio_sects)
+ return NULL;
+
+ bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask);
+ bio->bi_iter.bi_sector = *sector;
+ bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT;
+ *sector += bio_sects;
+ *nr_sects -= bio_sects;
+ /*
+ * We can loop for a long time in here if someone does full device
+ * discards (like mkfs). Be nice and allow us to schedule out to avoid
+ * softlocking if preempt is disabled.
+ */
+ cond_resched();
+ return bio;
+}
+
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
{
- struct bio *bio = *biop;
-
- while (nr_sects) {
- sector_t req_sects =
- min(nr_sects, bio_discard_limit(bdev, sector));
+ struct bio *bio;
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
- bio->bi_iter.bi_sector = sector;
- bio->bi_iter.bi_size = req_sects << 9;
- sector += req_sects;
- nr_sects -= req_sects;
-
- /*
- * We can loop for a long time in here, if someone does
- * full device discards (like mkfs). Be nice and allow
- * us to schedule out to avoid softlocking if preempt
- * is disabled.
- */
- cond_resched();
- if (fatal_signal_pending(current)) {
- await_bio_chain(bio);
- return -EINTR;
- }
+ while (!fatal_signal_pending(current)) {
+ bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp_mask);
+ if (!bio)
+ return 0;
+ *biop = bio_chain_and_submit(*biop, bio);
}
- *biop = bio;
- return 0;
+ if (*biop)
+ await_bio_chain(*biop);
+ return -EINTR;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 643d61b7cb82f7..74138c815657fd 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -826,4 +826,7 @@ struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask);
+
#endif /* __LINUX_BIO_H */
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 4/5] block: move await_bio_chain to bio.c
2024-03-12 14:48 RFCv2: fix fatal signal handling in __blkdev_issue_discard Christoph Hellwig
` (2 preceding siblings ...)
2024-03-12 14:48 ` [PATCH 3/5] block: add a blk_alloc_discard_bio helper Christoph Hellwig
@ 2024-03-12 14:48 ` Christoph Hellwig
2024-03-12 14:48 ` [PATCH 5/5] block: don't allow fatal signals to interrupt (__)blkdev_issue_discard Christoph Hellwig
4 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2024-03-12 14:48 UTC (permalink / raw)
To: Jens Axboe, Chandan Babu R, Keith Busch
Cc: linux-block, linux-nvme, linux-xfs
we'll want to use it from more than blk-lib.c, so this seems like
the better place. Also rename it so that the name starts with bio_.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/bio.c | 20 ++++++++++++++++++++
block/blk-lib.c | 28 ++++------------------------
block/blk.h | 1 +
3 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 32ff538b29e564..33972deed87fb3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1396,6 +1396,26 @@ int submit_bio_wait(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio_wait);
+static void bio_wait_end_io(struct bio *bio)
+{
+ complete(bio->bi_private);
+ bio_put(bio);
+}
+
+/*
+ * bio_await_chain - ends @bio and waits for every chained bio to complete
+ */
+void bio_await_chain(struct bio *bio)
+{
+ DECLARE_COMPLETION_ONSTACK_MAP(done,
+ bio->bi_bdev->bd_disk->lockdep_map);
+
+ bio->bi_private = &done;
+ bio->bi_end_io = bio_wait_end_io;
+ bio_endio(bio);
+ blk_wait_io(&done);
+}
+
void __bio_advance(struct bio *bio, unsigned bytes)
{
if (bio_integrity(bio))
diff --git a/block/blk-lib.c b/block/blk-lib.c
index fd97f4dd34e7f4..8021bc3831d56a 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -35,26 +35,6 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
}
-static void await_bio_endio(struct bio *bio)
-{
- complete(bio->bi_private);
- bio_put(bio);
-}
-
-/*
- * await_bio_chain - ends @bio and waits for every chained bio to complete
- */
-static void await_bio_chain(struct bio *bio)
-{
- DECLARE_COMPLETION_ONSTACK_MAP(done,
- bio->bi_bdev->bd_disk->lockdep_map);
-
- bio->bi_private = &done;
- bio->bi_end_io = await_bio_endio;
- bio_endio(bio);
- blk_wait_io(&done);
-}
-
struct bio *blk_alloc_discard_bio(struct block_device *bdev,
sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
{
@@ -93,7 +73,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
if (*biop)
- await_bio_chain(*biop);
+ bio_await_chain(*biop);
return -EINTR;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
@@ -158,7 +138,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
sector += len;
cond_resched();
if (fatal_signal_pending(current)) {
- await_bio_chain(bio);
+ bio_await_chain(bio);
return -EINTR;
}
}
@@ -206,7 +186,7 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
}
cond_resched();
if (fatal_signal_pending(current)) {
- await_bio_chain(bio);
+ bio_await_chain(bio);
return -EINTR;
}
}
@@ -352,7 +332,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
nr_sects -= len;
cond_resched();
if (fatal_signal_pending(current)) {
- await_bio_chain(bio);
+ bio_await_chain(bio);
ret = -EINTR;
bio = NULL;
break;
diff --git a/block/blk.h b/block/blk.h
index a19b7b42e6503c..78528bfbd58c37 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -38,6 +38,7 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
void submit_bio_noacct_nocheck(struct bio *bio);
+void bio_await_chain(struct bio *bio);
static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
{
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 5/5] block: don't allow fatal signals to interrupt (__)blkdev_issue_discard
2024-03-12 14:48 RFCv2: fix fatal signal handling in __blkdev_issue_discard Christoph Hellwig
` (3 preceding siblings ...)
2024-03-12 14:48 ` [PATCH 4/5] block: move await_bio_chain to bio.c Christoph Hellwig
@ 2024-03-12 14:48 ` Christoph Hellwig
4 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2024-03-12 14:48 UTC (permalink / raw)
To: Jens Axboe, Chandan Babu R, Keith Busch
Cc: linux-block, linux-nvme, linux-xfs
File system won't to handle fatal signals themselves and are generally
not prepared for EINTR errors from (__)blkdev_issue_discard. Remove
the logic from the generic helpers and instead open code the discard
bio submission in the ioctl handler that wants it.
Fixes: 8a08c5fd89b4 ("blk-lib: check for kill signal")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-lib.c | 12 +++---------
block/ioctl.c | 24 ++++++++++++++++++++++--
2 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 8021bc3831d56a..90b75605299b9c 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -65,16 +65,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
struct bio *bio;
- while (!fatal_signal_pending(current)) {
- bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp_mask);
- if (!bio)
- return 0;
+ while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects,
+ gfp_mask)))
*biop = bio_chain_and_submit(*biop, bio);
- }
-
- if (*biop)
- bio_await_chain(*biop);
- return -EINTR;
+ return 0;
}
EXPORT_SYMBOL(__blkdev_issue_discard);
diff --git a/block/ioctl.c b/block/ioctl.c
index 57c8171fda93c5..32bbdba77d6941 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -96,10 +96,12 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
sector_t bs_mask = (bdev_logical_block_size(bdev) >> SECTOR_SHIFT) - 1;
+ struct inode *inode = bdev->bd_inode;
sector_t sector, nr_sects;
+ struct bio *bio = NULL, *new;
+ struct blk_plug plug;
uint64_t range[2];
uint64_t start, len;
- struct inode *inode = bdev->bd_inode;
int err;
if (!(mode & BLK_OPEN_WRITE))
@@ -129,7 +131,25 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
goto fail;
- err = blkdev_issue_discard(bdev, sector, nr_sects, GFP_KERNEL);
+ blk_start_plug(&plug);
+ while (!fatal_signal_pending(current)) {
+ new = blk_alloc_discard_bio(bdev, §or, &nr_sects,
+ GFP_KERNEL);
+ if (!new)
+ break;
+ bio = bio_chain_and_submit(bio, new);
+ }
+ if (fatal_signal_pending(current)) {
+ if (bio)
+ bio_await_chain(bio);
+ err = -EINTR;
+ } else if (bio) {
+ err = submit_bio_wait(bio);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread