From: Li Dongyang <dongyangli@ddn.com>
To: linux-ext4@vger.kernel.org
Cc: Andreas Dilger <adilger@dilger.ca>, Alex Zhuravlev <bzzz@whamcloud.com>
Subject: [PATCH V2] jbd2: use rhashtable for revoke records during replay
Date: Tue, 5 Nov 2024 14:44:28 +1100 [thread overview]
Message-ID: <20241105034428.578701-1-dongyangli@ddn.com> (raw)
Resizable hashtable should improve journal replay time when
we have million of revoke records.
Notice that rhashtable is used during replay only,
as removal with list_del() is less expensive and it's still used
during regular processing.
before:
1048576 records - 95 seconds
2097152 records - 580 seconds
after:
1048576 records - 2 seconds
2097152 records - 3 seconds
4194304 records - 7 seconds
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
v1->v2:
include rhashtable header in jbd2.h
---
fs/jbd2/recovery.c | 4 +++
fs/jbd2/revoke.c | 65 +++++++++++++++++++++++++++++++-------------
include/linux/jbd2.h | 7 +++++
3 files changed, 57 insertions(+), 19 deletions(-)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 667f67342c52..d9287439171c 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -294,6 +294,10 @@ int jbd2_journal_recover(journal_t *journal)
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
+ err = jbd2_journal_init_recovery_revoke(journal);
+ if (err)
+ return err;
+
/*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..d6e96099e9c9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -90,6 +90,7 @@
#include <linux/bio.h>
#include <linux/log2.h>
#include <linux/hash.h>
+#include <linux/rhashtable.h>
#endif
static struct kmem_cache *jbd2_revoke_record_cache;
@@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_table_cache;
struct jbd2_revoke_record_s
{
- struct list_head hash;
+ union {
+ struct list_head hash;
+ struct rhash_head linkage;
+ };
tid_t sequence; /* Used for recovery only */
unsigned long long blocknr;
};
@@ -680,13 +684,22 @@ static void flush_descriptor(journal_t *journal,
* single block.
*/
+static const struct rhashtable_params revoke_rhashtable_params = {
+ .key_len = sizeof(unsigned long long),
+ .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr),
+ .head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
+};
+
int jbd2_journal_set_revoke(journal_t *journal,
unsigned long long blocknr,
tid_t sequence)
{
struct jbd2_revoke_record_s *record;
+ gfp_t gfp_mask = GFP_NOFS;
+ int err;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (record) {
/* If we have multiple occurrences, only record the
* latest sequence number in the hashed record */
@@ -694,7 +707,22 @@ int jbd2_journal_set_revoke(journal_t *journal,
record->sequence = sequence;
return 0;
}
- return insert_revoke_hash(journal, blocknr, sequence);
+
+ if (journal_oom_retry)
+ gfp_mask |= __GFP_NOFAIL;
+ record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
+ if (!record)
+ return -ENOMEM;
+
+ record->sequence = sequence;
+ record->blocknr = blocknr;
+ err = rhashtable_lookup_insert_fast(&journal->j_revoke_rhtable,
+ &record->linkage,
+ revoke_rhashtable_params);
+ if (err)
+ kmem_cache_free(jbd2_revoke_record_cache, record);
+
+ return err;
}
/*
@@ -710,7 +738,8 @@ int jbd2_journal_test_revoke(journal_t *journal,
{
struct jbd2_revoke_record_s *record;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (!record)
return 0;
if (tid_gt(sequence, record->sequence))
@@ -718,6 +747,17 @@ int jbd2_journal_test_revoke(journal_t *journal,
return 1;
}
+int jbd2_journal_init_recovery_revoke(journal_t *journal)
+{
+ return rhashtable_init(&journal->j_revoke_rhtable,
+ &revoke_rhashtable_params);
+}
+
+static void jbd2_revoke_record_free(void *ptr, void *arg)
+{
+ kmem_cache_free(jbd2_revoke_record_cache, ptr);
+}
+
/*
* Finally, once recovery is over, we need to clear the revoke table so
* that it can be reused by the running filesystem.
@@ -725,19 +765,6 @@ int jbd2_journal_test_revoke(journal_t *journal,
void jbd2_journal_clear_revoke(journal_t *journal)
{
- int i;
- struct list_head *hash_list;
- struct jbd2_revoke_record_s *record;
- struct jbd2_revoke_table_s *revoke;
-
- revoke = journal->j_revoke;
-
- for (i = 0; i < revoke->hash_size; i++) {
- hash_list = &revoke->hash_table[i];
- while (!list_empty(hash_list)) {
- record = (struct jbd2_revoke_record_s*) hash_list->next;
- list_del(&record->hash);
- kmem_cache_free(jbd2_revoke_record_cache, record);
- }
- }
+ rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
+ jbd2_revoke_record_free, NULL);
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..2b0aa1e159b8 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/bit_spinlock.h>
#include <linux/blkdev.h>
+#include <linux/rhashtable-types.h>
#include <crypto/hash.h>
#endif
@@ -1122,6 +1123,11 @@ struct journal_s
*/
struct jbd2_revoke_table_s *j_revoke_table[2];
+ /**
+ * @j_revoke_rhtable: rhashtable for revoke records during recovery
+ */
+ struct rhashtable j_revoke_rhtable;
+
/**
* @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
*/
@@ -1644,6 +1650,7 @@ extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
/* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
+extern int jbd2_journal_init_recovery_revoke(journal_t *);
extern void jbd2_journal_clear_revoke(journal_t *);
extern void jbd2_journal_switch_revoke_table(journal_t *journal);
extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
--
2.47.0
next reply other threads:[~2024-11-05 4:18 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-05 3:44 Li Dongyang [this message]
2024-11-08 10:33 ` [PATCH V2] jbd2: use rhashtable for revoke records during replay Jan Kara
2024-11-08 16:11 ` Theodore Ts'o
2024-11-12 18:44 ` Andreas Dilger
2024-11-13 14:47 ` Jan Kara
2025-01-16 0:08 ` Andreas Dilger
2025-01-16 18:04 ` Jan Kara
2024-11-09 3:12 ` Zhang Yi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241105034428.578701-1-dongyangli@ddn.com \
--to=dongyangli@ddn.com \
--cc=adilger@dilger.ca \
--cc=bzzz@whamcloud.com \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.