* [PATCH 1/5] bcachefs: Kill journal_res_state.unwritten_idx
2025-02-06 1:28 [PATCH 0/5] improve journal pipelining Kent Overstreet
@ 2025-02-06 1:28 ` Kent Overstreet
2025-02-06 1:28 ` [PATCH 2/5] bcachefs: Kill journal_res.idx Kent Overstreet
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2025-02-06 1:28 UTC (permalink / raw)
To: linux-bcachefs, linux-kerenl; +Cc: Kent Overstreet
Dead code
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/journal.c | 3 +--
fs/bcachefs/journal_io.c | 13 +------------
fs/bcachefs/journal_types.h | 7 +++----
3 files changed, 5 insertions(+), 18 deletions(-)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 24c294d4634e..da64a73704ff 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1382,8 +1382,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
set_bit(JOURNAL_running, &j->flags);
j->last_flush_write = jiffies;
- j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
- j->reservations.unwritten_idx++;
+ j->reservations.idx = journal_cur_seq(j);
c->last_bucket_seq_cleanup = journal_cur_seq(j);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 11c39e0c34f4..f2ff28e6697c 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1611,7 +1611,6 @@ static CLOSURE_CALLBACK(journal_write_done)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
- union journal_res_state old, new;
u64 seq = le64_to_cpu(w->data->seq);
int err = 0;
@@ -1671,16 +1670,6 @@ static CLOSURE_CALLBACK(journal_write_done)
if (j->watermark != BCH_WATERMARK_stripe)
journal_reclaim_kick(&c->journal);
- old.v = atomic64_read(&j->reservations.counter);
- do {
- new.v = old.v;
- BUG_ON(journal_state_count(new, new.unwritten_idx));
- BUG_ON(new.unwritten_idx != (seq & JOURNAL_BUF_MASK));
-
- new.unwritten_idx++;
- } while (!atomic64_try_cmpxchg(&j->reservations.counter,
- &old.v, new.v));
-
closure_wake_up(&w->wait);
completed = true;
}
@@ -1695,7 +1684,7 @@ static CLOSURE_CALLBACK(journal_write_done)
}
if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
- new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
+ j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
struct journal_buf *buf = journal_cur_buf(j);
long delta = buf->expires - jiffies;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a198a81d7478..e38a3d93b17d 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -95,9 +95,8 @@ union journal_res_state {
};
struct {
- u64 cur_entry_offset:20,
+ u64 cur_entry_offset:22,
idx:2,
- unwritten_idx:2,
buf0_count:10,
buf1_count:10,
buf2_count:10,
@@ -107,13 +106,13 @@ union journal_res_state {
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
-#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
+#define JOURNAL_ENTRY_SIZE_MAX (4U << 22) /* 16M */
/*
* We stash some journal state as sentinal values in cur_entry_offset:
* note - cur_entry_offset is in units of u64s
*/
-#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 20) - 1)
+#define JOURNAL_ENTRY_OFFSET_MAX ((1U << 22) - 1)
#define JOURNAL_ENTRY_BLOCKED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 2)
#define JOURNAL_ENTRY_CLOSED_VAL (JOURNAL_ENTRY_OFFSET_MAX - 1)
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/5] bcachefs: Kill journal_res.idx
2025-02-06 1:28 [PATCH 0/5] improve journal pipelining Kent Overstreet
2025-02-06 1:28 ` [PATCH 1/5] bcachefs: Kill journal_res_state.unwritten_idx Kent Overstreet
@ 2025-02-06 1:28 ` Kent Overstreet
2025-02-06 1:28 ` [PATCH 3/5] bcachefs: Don't touch journal_buf->data->seq in journal_res_get Kent Overstreet
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2025-02-06 1:28 UTC (permalink / raw)
To: linux-bcachefs, linux-kerenl; +Cc: Kent Overstreet
More dead code.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/journal.c | 2 +-
fs/bcachefs/journal.h | 11 ++++++-----
fs/bcachefs/journal_types.h | 1 -
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index da64a73704ff..20b748f61b21 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -306,7 +306,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t
bch2_journal_space_available(j);
- __bch2_journal_buf_put(j, old.idx, le64_to_cpu(buf->data->seq));
+ __bch2_journal_buf_put(j, le64_to_cpu(buf->data->seq));
}
void bch2_journal_halt(struct journal *j)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 107f7f901cd9..1e5fcfe3624a 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -193,7 +193,7 @@ bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
static inline struct jset_entry *
journal_res_entry(struct journal *j, struct journal_res *res)
{
- return vstruct_idx(j->buf[res->idx].data, res->offset);
+ return vstruct_idx(j->buf[res->seq & JOURNAL_BUF_MASK].data, res->offset);
}
static inline unsigned journal_entry_init(struct jset_entry *entry, unsigned type,
@@ -267,8 +267,9 @@ bool bch2_journal_entry_close(struct journal *);
void bch2_journal_do_writes(struct journal *);
void bch2_journal_buf_put_final(struct journal *, u64);
-static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
{
+ unsigned idx = seq & JOURNAL_BUF_MASK;
union journal_res_state s;
s = journal_state_buf_put(j, idx);
@@ -276,8 +277,9 @@ static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 s
bch2_journal_buf_put_final(j, seq);
}
-static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
+static inline void bch2_journal_buf_put(struct journal *j, u64 seq)
{
+ unsigned idx = seq & JOURNAL_BUF_MASK;
union journal_res_state s;
s = journal_state_buf_put(j, idx);
@@ -306,7 +308,7 @@ static inline void bch2_journal_res_put(struct journal *j,
BCH_JSET_ENTRY_btree_keys,
0, 0, 0);
- bch2_journal_buf_put(j, res->idx, res->seq);
+ bch2_journal_buf_put(j, res->seq);
res->ref = 0;
}
@@ -361,7 +363,6 @@ static inline int journal_res_get_fast(struct journal *j,
&old.v, new.v));
res->ref = true;
- res->idx = old.idx;
res->offset = old.cur_entry_offset;
res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
return 1;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index e38a3d93b17d..6a098c7e3f2e 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -79,7 +79,6 @@ struct journal_entry_pin {
struct journal_res {
bool ref;
- u8 idx;
u16 u64s;
u32 offset;
u64 seq;
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/5] bcachefs: Don't touch journal_buf->data->seq in journal_res_get
2025-02-06 1:28 [PATCH 0/5] improve journal pipelining Kent Overstreet
2025-02-06 1:28 ` [PATCH 1/5] bcachefs: Kill journal_res_state.unwritten_idx Kent Overstreet
2025-02-06 1:28 ` [PATCH 2/5] bcachefs: Kill journal_res.idx Kent Overstreet
@ 2025-02-06 1:28 ` Kent Overstreet
2025-02-06 1:28 ` [PATCH 4/5] bcachefs: Free journal bufs when not in use Kent Overstreet
2025-02-06 1:28 ` [PATCH 5/5] bcachefs: Increase JOURNAL_BUF_NR Kent Overstreet
4 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2025-02-06 1:28 UTC (permalink / raw)
To: linux-bcachefs, linux-kerenl; +Cc: Kent Overstreet
This is a small optimization, reducing the number of cachelines we touch
in the fast path - and it's also necessary for the next patch that
increases JOURNAL_BUF_NR.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/journal.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 1e5fcfe3624a..e514d664b8ae 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -364,7 +364,10 @@ static inline int journal_res_get_fast(struct journal *j,
res->ref = true;
res->offset = old.cur_entry_offset;
- res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
+ res->seq = journal_cur_seq(j);
+ res->seq -= (res->seq - old.idx) & JOURNAL_BUF_MASK;
+
+ EBUG_ON(res->seq != le64_to_cpu(j->buf[old.idx].data->seq));
return 1;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/5] bcachefs: Free journal bufs when not in use
2025-02-06 1:28 [PATCH 0/5] improve journal pipelining Kent Overstreet
` (2 preceding siblings ...)
2025-02-06 1:28 ` [PATCH 3/5] bcachefs: Don't touch journal_buf->data->seq in journal_res_get Kent Overstreet
@ 2025-02-06 1:28 ` Kent Overstreet
2025-02-06 1:28 ` [PATCH 5/5] bcachefs: Increase JOURNAL_BUF_NR Kent Overstreet
4 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2025-02-06 1:28 UTC (permalink / raw)
To: linux-bcachefs, linux-kerenl; +Cc: Kent Overstreet
Since we're increasing the number of 'struct journal_bufs', we don't
want them all permanently holding onto buffers for the journal data -
that'd be 16 * 2MB = 32MB, or potentially more.
Add a single-element mempool (open coded, since buffer size varies),
this also means we won't be hitting the memory allocator every time we
open and close a journal entry/buffer.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/journal.c | 63 +++++++++++++++++++++++++++++++------
fs/bcachefs/journal_io.c | 15 ++++++++-
fs/bcachefs/journal_types.h | 3 ++
3 files changed, 70 insertions(+), 11 deletions(-)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 20b748f61b21..8fbdb0bbb536 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -58,9 +58,12 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
- prt_printf(out, "size:\t");
- prt_human_readable_u64(out, vstruct_bytes(buf->data));
- prt_newline(out);
+ struct jset *data = READ_ONCE(buf->data);
+ if (data) {
+ prt_printf(out, "size:\t");
+ prt_human_readable_u64(out, vstruct_bytes(data));
+ prt_newline(out);
+ }
prt_printf(out, "expires:\t");
prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
@@ -398,8 +401,16 @@ static int journal_entry_open(struct journal *j)
return JOURNAL_ERR_insufficient_devices; /* -EROFS */
}
+ if (!j->free_buf && !buf->data)
+ return JOURNAL_ERR_enomem; /* will retry after write completion frees up a buf */
+
BUG_ON(!j->cur_entry_sectors);
+ if (!buf->data) {
+ swap(buf->data, j->free_buf);
+ swap(buf->buf_size, j->free_buf_size);
+ }
+
buf->expires =
(journal_cur_seq(j) == j->flushed_seq_ondisk
? jiffies
@@ -514,6 +525,33 @@ static void journal_write_work(struct work_struct *work)
spin_unlock(&j->lock);
}
+static void journal_buf_prealloc(struct journal *j)
+{
+ if (j->free_buf &&
+ j->free_buf_size >= j->buf_size_want)
+ return;
+
+ unsigned buf_size = j->buf_size_want;
+
+ spin_unlock(&j->lock);
+ void *buf = kvmalloc(buf_size, GFP_NOFS);
+ spin_lock(&j->lock);
+
+ if (buf &&
+ (!j->free_buf ||
+ buf_size > j->free_buf_size)) {
+ swap(buf, j->free_buf);
+ swap(buf_size, j->free_buf_size);
+ }
+
+ if (unlikely(buf)) {
+ spin_unlock(&j->lock);
+ /* kvfree can sleep */
+ kvfree(buf);
+ spin_lock(&j->lock);
+ }
+}
+
static int __journal_res_get(struct journal *j, struct journal_res *res,
unsigned flags)
{
@@ -544,6 +582,8 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
spin_lock(&j->lock);
+ journal_buf_prealloc(j);
+
/*
* Recheck after taking the lock, so we don't race with another thread
* that just did journal_entry_open() and call bch2_journal_entry_close()
@@ -951,7 +991,8 @@ static void __bch2_journal_block(struct journal *j)
new.cur_entry_offset = JOURNAL_ENTRY_BLOCKED_VAL;
} while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v));
- journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
+ if (old.cur_entry_offset < JOURNAL_ENTRY_BLOCKED_VAL)
+ journal_cur_buf(j)->data->u64s = cpu_to_le32(old.cur_entry_offset);
}
}
@@ -1474,6 +1515,7 @@ void bch2_fs_journal_exit(struct journal *j)
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
kvfree(j->buf[i].data);
+ kvfree(j->free_buf);
free_fifo(&j->pin);
}
@@ -1500,13 +1542,13 @@ int bch2_fs_journal_init(struct journal *j)
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
return -BCH_ERR_ENOMEM_journal_pin_fifo;
- for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
- j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
- j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
- if (!j->buf[i].data)
- return -BCH_ERR_ENOMEM_journal_buf;
+ j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
+ j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
+ if (!j->free_buf)
+ return -BCH_ERR_ENOMEM_journal_buf;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
j->buf[i].idx = i;
- }
j->pin.front = j->pin.back = 1;
@@ -1556,6 +1598,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
prt_printf(out, "average write size:\t");
prt_human_readable_u64(out, nr_writes ? div64_u64(j->entry_bytes_written, nr_writes) : 0);
prt_newline(out);
+ prt_printf(out, "free buf:\t%u\n", j->free_buf ? j->free_buf_size : 0);
prt_printf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
prt_printf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
prt_printf(out, "reclaim kicked:\t%u\n", j->reclaim_kicked);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index f2ff28e6697c..3ff6563b7dd2 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1640,6 +1640,19 @@ static CLOSURE_CALLBACK(journal_write_done)
j->err_seq = seq;
w->write_done = true;
+ if (!j->free_buf || j->free_buf_size < w->buf_size) {
+ swap(j->free_buf, w->data);
+ swap(j->free_buf_size, w->buf_size);
+ }
+
+ if (w->data) {
+ spin_unlock(&j->lock);
+ kvfree(w->data);
+ w->data = NULL;
+ w->buf_size = 0;
+ spin_lock(&j->lock);
+ }
+
bool completed = false;
for (seq = journal_last_unwritten_seq(j);
@@ -1649,7 +1662,7 @@ static CLOSURE_CALLBACK(journal_write_done)
if (!w->write_done)
break;
- if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
+ if (!j->err_seq && !w->noflush) {
j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = w->last_seq;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 6a098c7e3f2e..cd8440868d38 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -153,6 +153,7 @@ enum journal_flags {
x(journal_full) \
x(journal_pin_full) \
x(journal_stuck) \
+ x(enomem) \
x(insufficient_devices)
enum journal_errors {
@@ -215,6 +216,8 @@ struct journal {
* other is possibly being written out.
*/
struct journal_buf buf[JOURNAL_BUF_NR];
+ void *free_buf;
+ unsigned free_buf_size;
spinlock_t lock;
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 5/5] bcachefs: Increase JOURNAL_BUF_NR
2025-02-06 1:28 [PATCH 0/5] improve journal pipelining Kent Overstreet
` (3 preceding siblings ...)
2025-02-06 1:28 ` [PATCH 4/5] bcachefs: Free journal bufs when not in use Kent Overstreet
@ 2025-02-06 1:28 ` Kent Overstreet
4 siblings, 0 replies; 6+ messages in thread
From: Kent Overstreet @ 2025-02-06 1:28 UTC (permalink / raw)
To: linux-bcachefs, linux-kerenl; +Cc: Kent Overstreet
Increase journal pipelining.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/bcachefs.h | 1 +
fs/bcachefs/journal.c | 27 +++++++++++++++++++++++----
fs/bcachefs/journal.h | 32 ++++++++++++++++++++++----------
fs/bcachefs/journal_types.h | 8 +++++++-
4 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 13acfbf3852a..9791bfe08895 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -456,6 +456,7 @@ BCH_DEBUG_PARAMS_DEBUG()
x(blocked_journal_low_on_space) \
x(blocked_journal_low_on_pin) \
x(blocked_journal_max_in_flight) \
+ x(blocked_journal_max_open) \
x(blocked_key_cache_flush) \
x(blocked_allocate) \
x(blocked_allocate_open_bucket) \
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 8fbdb0bbb536..66f2fe357719 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -56,7 +56,12 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
prt_printf(out, "seq:\t%llu\n", seq);
printbuf_indent_add(out, 2);
- prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
+ if (!buf->write_started)
+ prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i & JOURNAL_STATE_BUF_MASK));
+
+ struct closure *cl = &buf->io;
+ int r = atomic_read(&cl->remaining);
+ prt_printf(out, "io:\t%pS r %i\n", cl->fn, r & CLOSURE_REMAINING_MASK);
struct jset *data = READ_ONCE(buf->data);
if (data) {
@@ -198,7 +203,8 @@ void bch2_journal_do_writes(struct journal *j)
if (w->write_started)
continue;
- if (!journal_state_count(j->reservations, idx)) {
+ if (!journal_state_seq_count(j, j->reservations, seq)) {
+ j->seq_write_started = seq;
w->write_started = true;
closure_call(&w->io, bch2_journal_write, j->wq, NULL);
}
@@ -394,6 +400,9 @@ static int journal_entry_open(struct journal *j)
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return JOURNAL_ERR_max_in_flight;
+ if (atomic64_read(&j->seq) - j->seq_write_started == JOURNAL_STATE_BUF_NR)
+ return JOURNAL_ERR_max_open;
+
if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) {
bch_err(c, "cannot start: journal seq overflow");
if (bch2_fs_emergency_read_only_locked(c))
@@ -475,7 +484,7 @@ static int journal_entry_open(struct journal *j)
new.idx++;
BUG_ON(journal_state_count(new, new.idx));
- BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
+ BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_STATE_BUF_MASK));
journal_state_inc(&new);
@@ -621,7 +630,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
if (ret == JOURNAL_ERR_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
+ bch2_journal_bufs_to_text(&buf, j);
+ trace_journal_entry_full(c, buf.buf);
+ printbuf_exit(&buf);
+ count_event(c, journal_entry_full);
+ }
+ if (ret == JOURNAL_ERR_max_open &&
+ track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true)) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
bch2_journal_bufs_to_text(&buf, j);
@@ -1033,7 +1051,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
*blocked = true;
}
- ret = journal_state_count(s, idx) > open
+ ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open
? ERR_PTR(-EAGAIN)
: buf;
break;
@@ -1383,6 +1401,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->replay_journal_seq_end = cur_seq;
j->last_seq_ondisk = last_seq;
j->flushed_seq_ondisk = cur_seq - 1;
+ j->seq_write_started = cur_seq - 1;
j->seq_ondisk = cur_seq - 1;
j->pin.front = last_seq;
j->pin.back = cur_seq;
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index e514d664b8ae..1c460ded2a11 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -121,11 +121,6 @@ static inline void journal_wake(struct journal *j)
closure_wake_up(&j->async_wait);
}
-static inline struct journal_buf *journal_cur_buf(struct journal *j)
-{
- return j->buf + j->reservations.idx;
-}
-
/* Sequence number of oldest dirty journal entry */
static inline u64 journal_last_seq(struct journal *j)
@@ -143,6 +138,15 @@ static inline u64 journal_last_unwritten_seq(struct journal *j)
return j->seq_ondisk + 1;
}
+static inline struct journal_buf *journal_cur_buf(struct journal *j)
+{
+ unsigned idx = (journal_cur_seq(j) &
+ JOURNAL_BUF_MASK &
+ ~JOURNAL_STATE_BUF_MASK) + j->reservations.idx;
+
+ return j->buf + idx;
+}
+
static inline int journal_state_count(union journal_res_state s, int idx)
{
switch (idx) {
@@ -154,6 +158,15 @@ static inline int journal_state_count(union journal_res_state s, int idx)
BUG();
}
+static inline int journal_state_seq_count(struct journal *j,
+ union journal_res_state s, u64 seq)
+{
+ if (journal_cur_seq(j) - seq <= JOURNAL_STATE_BUF_NR)
+ return journal_state_count(s, seq & JOURNAL_STATE_BUF_MASK);
+ else
+ return 0;
+}
+
static inline void journal_state_inc(union journal_res_state *s)
{
s->buf0_count += s->idx == 0;
@@ -269,7 +282,7 @@ void bch2_journal_buf_put_final(struct journal *, u64);
static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
{
- unsigned idx = seq & JOURNAL_BUF_MASK;
+ unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
union journal_res_state s;
s = journal_state_buf_put(j, idx);
@@ -279,7 +292,7 @@ static inline void __bch2_journal_buf_put(struct journal *j, u64 seq)
static inline void bch2_journal_buf_put(struct journal *j, u64 seq)
{
- unsigned idx = seq & JOURNAL_BUF_MASK;
+ unsigned idx = seq & JOURNAL_STATE_BUF_MASK;
union journal_res_state s;
s = journal_state_buf_put(j, idx);
@@ -365,9 +378,7 @@ static inline int journal_res_get_fast(struct journal *j,
res->ref = true;
res->offset = old.cur_entry_offset;
res->seq = journal_cur_seq(j);
- res->seq -= (res->seq - old.idx) & JOURNAL_BUF_MASK;
-
- EBUG_ON(res->seq != le64_to_cpu(j->buf[old.idx].data->seq));
+ res->seq -= (res->seq - old.idx) & JOURNAL_STATE_BUF_MASK;
return 1;
}
@@ -394,6 +405,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
(flags & JOURNAL_RES_GET_NONBLOCK) != 0,
NULL, _THIS_IP_);
EBUG_ON(!res->ref);
+ BUG_ON(!res->seq);
}
return 0;
}
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index cd8440868d38..060ec991dd2b 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -12,7 +12,11 @@
/* btree write buffer steals 8 bits for its own purposes: */
#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1)
-#define JOURNAL_BUF_BITS 2
+#define JOURNAL_STATE_BUF_BITS 2
+#define JOURNAL_STATE_BUF_NR (1U << JOURNAL_STATE_BUF_BITS)
+#define JOURNAL_STATE_BUF_MASK (JOURNAL_STATE_BUF_NR - 1)
+
+#define JOURNAL_BUF_BITS 4
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
@@ -150,6 +154,7 @@ enum journal_flags {
x(retry) \
x(blocked) \
x(max_in_flight) \
+ x(max_open) \
x(journal_full) \
x(journal_pin_full) \
x(journal_stuck) \
@@ -235,6 +240,7 @@ struct journal {
/* Sequence number of most recent journal entry (last entry in @pin) */
atomic64_t seq;
+ u64 seq_write_started;
/* seq, last_seq from the most recent journal entry successfully written */
u64 seq_ondisk;
u64 flushed_seq_ondisk;
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread