Linux Btrfs filesystem development
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>
Cc: linux-btrfs@vger.kernel.org
Subject: [PATCH] btrfs: allocate dummy ordereded_sums objects for nocsum I/O on zoned file systems
Date: Thu,  8 Jun 2023 14:14:10 +0200	[thread overview]
Message-ID: <20230608121410.275766-2-hch@lst.de> (raw)
In-Reply-To: <20230608121410.275766-1-hch@lst.de>

Zoned file systems now need the ordereded_sums structure to record the
actual write location returned by zone append, so allocate dummy
structures without the csum array for them when the I/O doesn't use
checksums, and free them when completing the ordered_extent.

Fixes: 177b0eb2c180 ("btrfs: optimize the logical to physical mapping for zoned writes")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/btrfs/bio.c       |  4 ++++
 fs/btrfs/file-item.c | 16 ++++++++++++++++
 fs/btrfs/file-item.h |  1 +
 fs/btrfs/zoned.c     | 21 +++++++++++++++++++--
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 2ca2d1fcdf2b9a..12b12443efaabb 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -705,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
 			ret = btrfs_bio_csum(bbio);
 			if (ret)
 				goto fail_put_bio;
+		} else if (use_append) {
+			ret = btrfs_alloc_dummy_sum(bbio);
+			if (ret)
+				goto fail_put_bio;
 		}
 	}
 
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 2db90c3bfd95a9..696bf695d8eb00 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -773,6 +773,22 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
 	return 0;
 }
 
+/*
+ * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
+ * record the updated logical address on Zone Append completion.
+ * Allocate just the structure with an empty sums array here for that case.
+ */
+blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
+{
+	bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
+	if (!bbio->sums)
+		return BLK_STS_RESOURCE;
+	bbio->sums->len = bbio->bio.bi_iter.bi_size;
+	bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+	btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
+	return 0;
+}
+
 /*
  * Remove one checksum overlapping a range.
  *
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 6be8725cd57474..4ec669b690080a 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
 blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
+blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit,
 			     bool nowait);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index bbde4ddd475492..637b2a2f45c94e 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1702,7 +1702,8 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
 
 void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(ordered->inode->i_sb);
+	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_ordered_sum *sum =
 		list_first_entry(&ordered->list, typeof(*sum), list);
 	u64 logical = sum->logical;
@@ -1717,7 +1718,7 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
 		if (!btrfs_zoned_split_ordered(ordered, logical, len)) {
 			set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
 			btrfs_err(fs_info, "failed to split ordered extent\n");
-			return;
+			goto out;
 		}
 		logical = sum->logical;
 		len = sum->len;
@@ -1725,6 +1726,22 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
 
 	if (ordered->disk_bytenr != logical)
 		btrfs_rewrite_logical_zoned(ordered, logical);
+
+out:
+	/*
+	 * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures
+	 * were allocated by btrfs_alloc_dummy_sum only to record the logical
+	 * addresses and don't contain actual checksums.  We thus must free them
+	 * here so that we don't attempt to log the csums later.
+	 */
+	if ((inode->flags & BTRFS_INODE_NODATASUM) ||
+	    test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+		while ((sum = list_first_entry_or_null(&ordered->list,
+						       typeof(*sum), list))) {
+			list_del(&sum->list);
+			kfree(sum);
+		}
+	}
 }
 
 bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-- 
2.39.2


  reply	other threads:[~2023-06-08 12:14 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-08 12:14 fix nodatasum I/O for zone devices v2 Christoph Hellwig
2023-06-08 12:14 ` Christoph Hellwig [this message]
2023-06-08 15:40   ` [PATCH] btrfs: allocate dummy ordereded_sums objects for nocsum I/O on zoned file systems David Sterba
2023-06-09  4:55     ` Christoph Hellwig
2023-06-09 19:18       ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230608121410.275766-2-hch@lst.de \
    --to=hch@lst.de \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox