From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Subject: [PATCH 2/3] bcachefs: bch2_folio_reservation_get_partial() is now better behaved
Date: Fri, 18 Oct 2024 00:40:02 -0400 [thread overview]
Message-ID: <20241018044006.4067521-3-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20241018044006.4067521-1-kent.overstreet@linux.dev>
bch2_folio_reservation_get_partial(), on partial success, will now
return a reservation that's aligned to the filesystem blocksize.
This is a partial fix for fstests generic/299 - fio verify is badly
behaved in the presence of short writes that aren't aligned to its
blocksize.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/buckets.c | 7 +++-
fs/bcachefs/buckets.h | 12 +++---
fs/bcachefs/fs-io-pagecache.c | 70 ++++++++++++++++++++++-------------
3 files changed, 57 insertions(+), 32 deletions(-)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 546cd01a72e3..ec7d9a59bea9 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1160,11 +1160,11 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
#define SECTORS_CACHE 1024
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
- u64 sectors, int flags)
+ u64 sectors, enum bch_reservation_flags flags)
{
struct bch_fs_pcpu *pcpu;
u64 old, get;
- s64 sectors_available;
+ u64 sectors_available;
int ret;
percpu_down_read(&c->mark_lock);
@@ -1202,6 +1202,9 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
percpu_u64_set(&c->pcpu->sectors_available, 0);
sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
+ if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
+ sectors = min(sectors, sectors_available);
+
if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index e2cb7b24b220..fd5e6ccad45e 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -344,14 +344,16 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
}
}
-#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
+enum bch_reservation_flags {
+ BCH_DISK_RESERVATION_NOFAIL = 1 << 0,
+ BCH_DISK_RESERVATION_PARTIAL = 1 << 1,
+};
-int __bch2_disk_reservation_add(struct bch_fs *,
- struct disk_reservation *,
- u64, int);
+int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *,
+ u64, enum bch_reservation_flags);
static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
- u64 sectors, int flags)
+ u64 sectors, enum bch_reservation_flags flags)
{
#ifdef __KERNEL__
u64 old, new;
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index cde3a0445ee9..1d4910ea0f1d 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -399,14 +399,17 @@ void bch2_folio_reservation_put(struct bch_fs *c,
bch2_quota_reservation_put(c, inode, &res->quota);
}
-int bch2_folio_reservation_get(struct bch_fs *c,
+static int __bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
- size_t offset, size_t len)
+ size_t offset, size_t len,
+ bool partial)
{
struct bch_folio *s = bch2_folio_create(folio, 0);
unsigned i, disk_sectors = 0, quota_sectors = 0;
+ struct disk_reservation disk_res = {};
+ size_t reserved = len;
int ret;
if (!s)
@@ -422,48 +425,65 @@ int bch2_folio_reservation_get(struct bch_fs *c,
}
if (disk_sectors) {
- ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
+ ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
+ partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
if (unlikely(ret))
return ret;
+
+ if (unlikely(disk_res.sectors != disk_sectors)) {
+ disk_sectors = quota_sectors = 0;
+
+ for (i = round_down(offset, block_bytes(c)) >> 9;
+ i < round_up(offset + len, block_bytes(c)) >> 9;
+ i++) {
+ disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
+ if (disk_sectors > disk_res.sectors) {
+ /*
+ * Make sure to get a reservation that's
+ * aligned to the filesystem blocksize:
+ */
+ unsigned reserved_offset = round_down(i << 9, block_bytes(c));
+ reserved = clamp(reserved_offset, offset, offset + len) - offset;
+
+ if (!reserved) {
+ bch2_disk_reservation_put(c, &disk_res);
+ return -BCH_ERR_ENOSPC_disk_reservation;
+ }
+ break;
+ }
+ quota_sectors += s->s[i].state == SECTOR_unallocated;
+ }
+ }
}
if (quota_sectors) {
ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
if (unlikely(ret)) {
- struct disk_reservation tmp = { .sectors = disk_sectors };
- res->disk.sectors -= disk_sectors;
-
- bch2_disk_reservation_put(c, &tmp);
+ bch2_disk_reservation_put(c, &disk_res);
return ret;
}
}
- return 0;
+ res->disk.sectors += disk_res.sectors;
+ return partial ? reserved : 0;
}
-ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
+int bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
- size_t l, reserved = 0;
- int ret;
-
- while ((l = len - reserved)) {
- while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
- if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
- return reserved ?: ret;
-
- len = reserved + l;
- l /= 2;
- }
-
- offset += l;
- reserved += l;
- }
+ return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
+}
- return reserved;
+ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ struct folio *folio,
+ struct bch2_folio_reservation *res,
+ size_t offset, size_t len)
+{
+ return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
}
static void bch2_clear_folio_bits(struct folio *folio)
--
2.45.2
next prev parent reply other threads:[~2024-10-18 4:40 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-18 4:40 [PATCH 0/3] fix buffered write path on -ENOSPC Kent Overstreet
2024-10-18 4:40 ` [PATCH 1/3] bcachefs: fix disk reservation accounting in bch2_folio_reservation_get() Kent Overstreet
2024-10-18 11:49 ` wangjianjian (C)
2024-10-18 4:40 ` Kent Overstreet [this message]
2024-10-18 4:40 ` [PATCH 3/3] bcachefs: Fix data corruption on -ENOSPC in buffered write path Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241018044006.4067521-3-kent.overstreet@linux.dev \
--to=kent.overstreet@linux.dev \
--cc=linux-bcachefs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox