From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>,
djwong@kernel.org, bfoster@redhat.com
Subject: [PATCH 03/21] bcachefs: btree write buffer knows how to accumulate bch_accounting keys
Date: Sat, 24 Feb 2024 21:38:05 -0500 [thread overview]
Message-ID: <20240225023826.2413565-4-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20240225023826.2413565-1-kent.overstreet@linux.dev>
Teach the btree write buffer how to accumulate accounting keys - instead
of having the newer key overwrite the older key as we do with other
updates, we need to add them together.
Also, add a flag so that write buffer flush knows when journal replay is
finished flushing accounting, and teach it to hold accounting keys until
that flag is set.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/bcachefs.h | 1 +
fs/bcachefs/btree_write_buffer.c | 66 +++++++++++++++++++++++++++-----
fs/bcachefs/recovery.c | 3 ++
3 files changed, 61 insertions(+), 9 deletions(-)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 62812fc1cad0..9a24989c9a6a 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -616,6 +616,7 @@ struct bch_dev {
#define BCH_FS_FLAGS() \
x(started) \
+ x(accounting_replay_done) \
x(may_go_rw) \
x(rw) \
x(was_rw) \
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b77e7b382b66..002a0762fc85 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -5,6 +5,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
+#include "disk_accounting.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
@@ -123,7 +124,9 @@ static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
struct btree_write_buffered_key *wb,
- bool *write_locked, size_t *fast)
+ bool *write_locked,
+ bool *accounting_accumulated,
+ size_t *fast)
{
struct btree_path *path;
int ret;
@@ -136,6 +139,16 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
if (ret)
return ret;
+ if (!*accounting_accumulated && wb->k.k.type == KEY_TYPE_accounting) {
+ struct bkey u;
+ struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u);
+
+ if (k.k->type == KEY_TYPE_accounting)
+ bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k),
+ bkey_s_c_to_accounting(k));
+ }
+ *accounting_accumulated = true;
+
/*
* We can't clone a path that has write locks: unshare it now, before
* set_pos and traverse():
@@ -248,8 +261,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_iter iter = { NULL };
- size_t skipped = 0, fast = 0, slowpath = 0;
+ size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
bool write_locked = false;
+ bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
int ret = 0;
bch2_trans_unlock(trans);
@@ -284,17 +298,29 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
+ bool accounting_accumulated = false;
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
BUG_ON(!k->journal_seq);
+ if (!accounting_replay_done &&
+ k->k.k.type == KEY_TYPE_accounting) {
+ slowpath++;
+ continue;
+ }
+
if (i + 1 < &darray_top(wb->sorted) &&
wb_key_eq(i, i + 1)) {
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
- skipped++;
+ if (k->k.k.type == KEY_TYPE_accounting &&
+ n->k.k.type == KEY_TYPE_accounting)
+ bch2_accounting_accumulate(bkey_i_to_accounting(&n->k),
+ bkey_i_to_s_c_accounting(&k->k));
+
+ overwritten++;
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
k->journal_seq = 0;
continue;
@@ -325,7 +351,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
break;
}
- ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
+ ret = wb_flush_one(trans, &iter, k, &write_locked,
+ &accounting_accumulated, &fast);
if (!write_locked)
bch2_trans_begin(trans);
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
@@ -361,8 +388,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
if (!i->journal_seq)
continue;
- bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
- bch2_btree_write_buffer_journal_flush);
+ if (!accounting_replay_done &&
+ i->k.k.type == KEY_TYPE_accounting) {
+ could_not_insert++;
+ continue;
+ }
+
+ if (!could_not_insert)
+ bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
+ bch2_btree_write_buffer_journal_flush);
bch2_trans_begin(trans);
@@ -375,13 +409,27 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
+
+ i->journal_seq = 0;
+ }
+
+ if (could_not_insert) {
+ struct btree_write_buffered_key *dst = wb->flushing.keys.data;
+
+ darray_for_each(wb->flushing.keys, i)
+ if (i->journal_seq)
+ *dst++ = *i;
+ wb->flushing.keys.nr = dst - wb->flushing.keys.data;
}
}
err:
+ if (ret || !could_not_insert) {
+ bch2_journal_pin_drop(j, &wb->flushing.pin);
+ wb->flushing.keys.nr = 0;
+ }
+
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
- trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
- bch2_journal_pin_drop(j, &wb->flushing.pin);
- wb->flushing.keys.nr = 0;
+ trace_write_buffer_flush(trans, wb->flushing.keys.nr, overwritten, fast, 0);
return ret;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 6829d80bd181..b8289af66c8e 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -228,6 +228,8 @@ static int bch2_journal_replay(struct bch_fs *c)
goto err;
}
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
+
/*
* First, attempt to replay keys in sorted order. This is more
* efficient - better locality of btree access - but some might fail if
@@ -1204,6 +1206,7 @@ int bch2_fs_initialize(struct bch_fs *c)
* set up the journal.pin FIFO and journal.cur pointer:
*/
bch2_fs_journal_start(&c->journal, 1);
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
bch2_journal_set_replay_done(&c->journal);
ret = bch2_fs_read_write_early(c);
--
2.43.0
next prev parent reply other threads:[~2024-02-25 2:38 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-25 2:38 [PATCH 00/21] bcachefs disk accounting rewrite Kent Overstreet
2024-02-25 2:38 ` [PATCH 01/21] bcachefs: KEY_TYPE_accounting Kent Overstreet
2024-02-27 15:49 ` Brian Foster
2024-02-28 19:39 ` Kent Overstreet
2024-02-29 18:43 ` Brian Foster
2024-02-29 21:24 ` Kent Overstreet
2024-03-01 15:03 ` Brian Foster
2024-03-01 19:30 ` Kent Overstreet
2024-02-25 2:38 ` [PATCH 02/21] bcachefs: Accumulate accounting keys in journal replay Kent Overstreet
2024-02-27 15:49 ` Brian Foster
2024-02-28 20:06 ` Kent Overstreet
2024-02-25 2:38 ` Kent Overstreet [this message]
2024-02-27 15:50 ` [PATCH 03/21] bcachefs: btree write buffer knows how to accumulate bch_accounting keys Brian Foster
2024-02-28 22:42 ` Kent Overstreet
2024-02-29 18:44 ` Brian Foster
2024-02-29 20:25 ` Kent Overstreet
2024-02-25 2:38 ` [PATCH 04/21] bcachefs: Disk space accounting rewrite Kent Overstreet
2024-02-27 15:55 ` Brian Foster
2024-02-29 4:10 ` Kent Overstreet
2024-02-29 18:44 ` Brian Foster
2024-02-29 21:16 ` Kent Overstreet
2024-03-01 15:03 ` Brian Foster
2024-02-25 2:38 ` [PATCH 05/21] bcachefs: dev_usage updated by new accounting Kent Overstreet
2024-02-25 2:38 ` [PATCH 06/21] bcachefs: Kill bch2_fs_usage_initialize() Kent Overstreet
2024-02-25 2:38 ` [PATCH 07/21] bcachefs: Convert bch2_ioctl_fs_usage() to new accounting Kent Overstreet
2024-02-25 2:38 ` [PATCH 08/21] bcachefs: kill bch2_fs_usage_read() Kent Overstreet
2024-02-25 2:38 ` [PATCH 09/21] bcachefs: Kill writing old accounting to journal Kent Overstreet
2024-02-25 2:38 ` [PATCH 10/21] bcachefs: Delete journal-buf-sharded old style accounting Kent Overstreet
2024-02-25 2:38 ` [PATCH 11/21] bcachefs: Kill bch2_fs_usage_to_text() Kent Overstreet
2024-02-25 2:38 ` [PATCH 12/21] bcachefs: Kill fs_usage_online Kent Overstreet
2024-02-25 2:38 ` [PATCH 13/21] bcachefs: Kill replicas_journal_res Kent Overstreet
2024-02-25 2:38 ` [PATCH 14/21] bcachefs: Convert gc to new accounting Kent Overstreet
2024-02-25 2:38 ` [PATCH 15/21] bcachefs: Convert bch2_replicas_gc2() " Kent Overstreet
2024-02-25 2:38 ` [PATCH 16/21] bcachefs: bch2_verify_accounting_clean() Kent Overstreet
2024-02-25 2:38 ` [PATCH 17/21] bcachefs: Eytzinger accumulation for accounting keys Kent Overstreet
2024-02-25 2:38 ` [PATCH 18/21] bcachefs: bch_acct_compression Kent Overstreet
2024-02-25 2:38 ` [PATCH 19/21] bcachefs: Convert bch2_compression_stats_to_text() to new accounting Kent Overstreet
2024-02-25 2:38 ` [PATCH 20/21] bcachefs: bch2_fs_accounting_to_text() Kent Overstreet
2024-02-25 2:38 ` [PATCH 21/21] bcachefs: bch2_fs_usage_base_to_text() Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240225023826.2413565-4-kent.overstreet@linux.dev \
--to=kent.overstreet@linux.dev \
--cc=bfoster@redhat.com \
--cc=djwong@kernel.org \
--cc=linux-bcachefs@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox