From: Christoph Hellwig <hch@lst.de>
To: Carlos Maiolino <cem@kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>, linux-xfs@vger.kernel.org
Subject: [PATCH 09/15] xfs: simplify buffer I/O submission
Date: Mon, 6 Jan 2025 10:54:46 +0100 [thread overview]
Message-ID: <20250106095613.847700-10-hch@lst.de> (raw)
In-Reply-To: <20250106095613.847700-1-hch@lst.de>
The code in _xfs_buf_ioapply is unnecessarily complicated because it
doesn't take advantage of modern bio features.
Simplify it by making use of bio splitting and chaining, that is build
a single bio for the pages in the buffer using a simple loop, and then
split that bio on the map boundaries for discontiguous multi-FSB buffers
and chain the split bios to the main one so that there is only a single
I/O completion.
This not only simplifies the code to build the buffer, but also removes
the need for the b_io_remaining field as buffer ownership is granted
to the bio on submit of the final bio with no chance for a completion
before that as well as the b_io_error field that is now superfluous
because there always is exactly one completion.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_buf.c | 207 ++++++++++++++---------------------------------
fs/xfs/xfs_buf.h | 2 -
2 files changed, 60 insertions(+), 149 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e886605b5721..094f16457998 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1364,13 +1364,6 @@ xfs_buf_ioend(
{
trace_xfs_buf_iodone(bp, _RET_IP_);
- /*
- * Pull in IO completion errors now. We are guaranteed to be running
- * single threaded, so we don't need the lock to read b_io_error.
- */
- if (!bp->b_error && bp->b_io_error)
- xfs_buf_ioerror(bp, bp->b_io_error);
-
if (bp->b_flags & XBF_READ) {
if (!bp->b_error && bp->b_ops)
bp->b_ops->verify_read(bp);
@@ -1494,118 +1487,26 @@ static void
xfs_buf_bio_end_io(
struct bio *bio)
{
- struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
+ struct xfs_buf *bp = bio->bi_private;
- if (!bio->bi_status &&
- (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
- XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
- bio->bi_status = BLK_STS_IOERR;
-
- /*
- * don't overwrite existing errors - otherwise we can lose errors on
- * buffers that require multiple bios to complete.
- */
- if (bio->bi_status) {
- int error = blk_status_to_errno(bio->bi_status);
-
- cmpxchg(&bp->b_io_error, 0, error);
- }
+ if (bio->bi_status)
+ xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
+ else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
+ XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
+ xfs_buf_ioerror(bp, -EIO);
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
- xfs_buf_ioend_async(bp);
+ xfs_buf_ioend_async(bp);
bio_put(bio);
}
-static void
-xfs_buf_ioapply_map(
- struct xfs_buf *bp,
- int map,
- int *buf_offset,
- int *count,
- blk_opf_t op)
-{
- int page_index;
- unsigned int total_nr_pages = bp->b_page_count;
- int nr_pages;
- struct bio *bio;
- sector_t sector = bp->b_maps[map].bm_bn;
- int size;
- int offset;
-
- /* skip the pages in the buffer before the start offset */
- page_index = 0;
- offset = *buf_offset;
- while (offset >= PAGE_SIZE) {
- page_index++;
- offset -= PAGE_SIZE;
- }
-
- /*
- * Limit the IO size to the length of the current vector, and update the
- * remaining IO count for the next time around.
- */
- size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
- *count -= size;
- *buf_offset += size;
-
-next_chunk:
- atomic_inc(&bp->b_io_remaining);
- nr_pages = bio_max_segs(total_nr_pages);
-
- bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
- bio->bi_iter.bi_sector = sector;
- bio->bi_end_io = xfs_buf_bio_end_io;
- bio->bi_private = bp;
-
- for (; size && nr_pages; nr_pages--, page_index++) {
- int rbytes, nbytes = PAGE_SIZE - offset;
-
- if (nbytes > size)
- nbytes = size;
-
- rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
- offset);
- if (rbytes < nbytes)
- break;
-
- offset = 0;
- sector += BTOBB(nbytes);
- size -= nbytes;
- total_nr_pages--;
- }
-
- if (likely(bio->bi_iter.bi_size)) {
- if (xfs_buf_is_vmapped(bp)) {
- flush_kernel_vmap_range(bp->b_addr,
- xfs_buf_vmap_len(bp));
- }
- submit_bio(bio);
- if (size)
- goto next_chunk;
- } else {
- /*
- * This is guaranteed not to be the last io reference count
- * because the caller (xfs_buf_submit) holds a count itself.
- */
- atomic_dec(&bp->b_io_remaining);
- xfs_buf_ioerror(bp, -EIO);
- bio_put(bio);
- }
-
-}
-
-STATIC void
-_xfs_buf_ioapply(
- struct xfs_buf *bp)
+static inline blk_opf_t
+xfs_buf_bio_op(
+ struct xfs_buf *bp)
{
- struct blk_plug plug;
- blk_opf_t op;
- int offset;
- int size;
- int i;
+ blk_opf_t op;
if (bp->b_flags & XBF_WRITE) {
op = REQ_OP_WRITE;
@@ -1615,25 +1516,52 @@ _xfs_buf_ioapply(
op |= REQ_RAHEAD;
}
- /* we only use the buffer cache for meta-data */
- op |= REQ_META;
+ return op | REQ_META;
+}
+
+static void
+xfs_buf_submit_bio(
+ struct xfs_buf *bp)
+{
+ unsigned int size = BBTOB(bp->b_length);
+ unsigned int map = 0, p;
+ struct blk_plug plug;
+ struct bio *bio;
+
+ bio = bio_alloc(bp->b_target->bt_bdev, bp->b_page_count,
+ xfs_buf_bio_op(bp), GFP_NOIO);
+ bio->bi_private = bp;
+ bio->bi_end_io = xfs_buf_bio_end_io;
+
+ if (bp->b_flags & _XBF_KMEM) {
+ __bio_add_page(bio, virt_to_page(bp->b_addr), size,
+ bp->b_offset);
+ } else {
+ for (p = 0; p < bp->b_page_count; p++)
+ __bio_add_page(bio, bp->b_pages[p], PAGE_SIZE, 0);
+ bio->bi_iter.bi_size = size; /* limit to the actual size used */
+
+ if (xfs_buf_is_vmapped(bp))
+ flush_kernel_vmap_range(bp->b_addr,
+ xfs_buf_vmap_len(bp));
+ }
/*
- * Walk all the vectors issuing IO on them. Set up the initial offset
- * into the buffer and the desired IO size before we start -
- * _xfs_buf_ioapply_vec() will modify them appropriately for each
- * subsequent call.
+ * If there is more than one map segment, split the original bio to
+ * submit a separate bio for each discontiguous range.
*/
- offset = bp->b_offset;
- size = BBTOB(bp->b_length);
blk_start_plug(&plug);
- for (i = 0; i < bp->b_map_count; i++) {
- xfs_buf_ioapply_map(bp, i, &offset, &size, op);
- if (bp->b_error)
- break;
- if (size <= 0)
- break; /* all done */
+ for (map = 0; map < bp->b_map_count - 1; map++) {
+ struct bio *split;
+
+ split = bio_split(bio, bp->b_maps[map].bm_len, GFP_NOFS,
+ &fs_bio_set);
+ split->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
+ bio_chain(split, bio);
+ submit_bio(split);
}
+ bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
+ submit_bio(bio);
blk_finish_plug(&plug);
}
@@ -1693,8 +1621,6 @@ static int
xfs_buf_submit(
struct xfs_buf *bp)
{
- int error = 0;
-
trace_xfs_buf_submit(bp, _RET_IP_);
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
@@ -1735,14 +1661,7 @@ xfs_buf_submit(
* left over from previous use of the buffer (e.g. failed readahead).
*/
bp->b_error = 0;
- bp->b_io_error = 0;
- /*
- * Set the count to 1 initially, this will stop an I/O completion
- * callout which happens before we have started all the I/O from calling
- * xfs_buf_ioend too early.
- */
- atomic_set(&bp->b_io_remaining, 1);
if (bp->b_flags & XBF_ASYNC)
xfs_buf_ioacct_inc(bp);
@@ -1755,28 +1674,22 @@ xfs_buf_submit(
if (xfs_buftarg_is_mem(bp->b_target))
goto done;
- _xfs_buf_ioapply(bp);
+ xfs_buf_submit_bio(bp);
+ goto rele;
done:
- /*
- * If _xfs_buf_ioapply failed, we can get back here with only the IO
- * reference we took above. If we drop it to zero, run completion so
- * that we don't return to the caller with completion still pending.
- */
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
- if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
- xfs_buf_ioend(bp);
- else
- xfs_buf_ioend_async(bp);
- }
-
+ if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
+ xfs_buf_ioend(bp);
+ else
+ xfs_buf_ioend_async(bp);
+rele:
/*
* Release the hold that keeps the buffer referenced for the entire
* I/O. Note that if the buffer is async, it is not safe to reference
* after this release.
*/
xfs_buf_rele(bp);
- return error;
+ return 0;
}
void *
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index da80399c7457..c53d27439ff2 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -184,7 +184,6 @@ struct xfs_buf {
struct list_head b_lru; /* lru list */
spinlock_t b_lock; /* internal state lock */
unsigned int b_state; /* internal state flags */
- int b_io_error; /* internal IO error state */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag;
@@ -202,7 +201,6 @@ struct xfs_buf {
struct xfs_buf_map __b_map; /* inline compound buffer map */
int b_map_count;
atomic_t b_pin_count; /* pin count */
- atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset of b_addr,
only for _XBF_KMEM buffers */
--
2.45.2
next prev parent reply other threads:[~2025-01-06 9:56 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-06 9:54 buffer cache cleanups Christoph Hellwig
2025-01-06 9:54 ` [PATCH 01/15] xfs: fix a double completion for buffers on in-memory targets Christoph Hellwig
2025-01-07 2:00 ` Darrick J. Wong
2025-01-07 6:05 ` Christoph Hellwig
2025-01-06 9:54 ` [PATCH 02/15] xfs: remove the incorrect comment above xfs_buf_free_maps Christoph Hellwig
2025-01-07 2:00 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 03/15] xfs: remove the incorrect comment about the b_pag field Christoph Hellwig
2025-01-07 2:01 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 04/15] xfs: move xfs_buf_iowait out of (__)xfs_buf_submit Christoph Hellwig
2025-01-07 2:02 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 05/15] xfs: simplify xfs_buf_delwri_pushbuf Christoph Hellwig
2025-01-07 2:08 ` Darrick J. Wong
2025-01-07 6:06 ` Christoph Hellwig
2025-01-13 7:12 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 06/15] xfs: remove xfs_buf_delwri_submit_buffers Christoph Hellwig
2025-01-07 6:31 ` Darrick J. Wong
2025-01-07 6:33 ` Christoph Hellwig
2025-01-06 9:54 ` [PATCH 07/15] xfs: move write verification out of _xfs_buf_ioapply Christoph Hellwig
2025-01-07 6:33 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 08/15] xfs: move in-memory buftarg handling " Christoph Hellwig
2025-01-07 6:34 ` Darrick J. Wong
2025-01-06 9:54 ` Christoph Hellwig [this message]
2025-01-07 6:42 ` [PATCH 09/15] xfs: simplify buffer I/O submission Darrick J. Wong
2025-01-07 6:46 ` Christoph Hellwig
2025-01-07 6:57 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 10/15] xfs: move invalidate_kernel_vmap_range to xfs_buf_ioend Christoph Hellwig
2025-01-07 6:42 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 11/15] xfs: remove the extra buffer reference in xfs_buf_submit Christoph Hellwig
2025-01-13 7:13 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 12/15] xfs: always complete the buffer inline " Christoph Hellwig
2025-01-07 6:46 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 13/15] xfs: simplify xfsaild_resubmit_item Christoph Hellwig
2025-01-07 6:49 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 14/15] xfs: move b_li_list based retry handling to common code Christoph Hellwig
2025-01-07 6:55 ` Darrick J. Wong
2025-01-07 7:03 ` Christoph Hellwig
2025-01-13 7:18 ` Darrick J. Wong
2025-01-06 9:54 ` [PATCH 15/15] xfs: add a b_iodone callback to struct xfs_buf Christoph Hellwig
2025-01-07 6:58 ` Darrick J. Wong
-- strict thread matches above, loose matches on Subject: below --
2025-01-13 14:12 buffer cache cleanups v2 Christoph Hellwig
2025-01-13 14:12 ` [PATCH 09/15] xfs: simplify buffer I/O submission Christoph Hellwig
2025-01-13 18:15 ` Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250106095613.847700-10-hch@lst.de \
--to=hch@lst.de \
--cc=cem@kernel.org \
--cc=djwong@kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox