From: Li Dongyang <dongyangli@ddn.com>
To: linux-ext4@vger.kernel.org
Cc: Andreas Dilger <adilger@dilger.ca>, Alex Zhuravlev <bzzz@whamcloud.com>
Subject: [PATCH V2] jbd2: use rhashtable for revoke records during replay
Date: Tue, 5 Nov 2024 14:44:28 +1100 [thread overview]
Message-ID: <20241105034428.578701-1-dongyangli@ddn.com> (raw)
Resizable hashtable should improve journal replay time when
we have million of revoke records.
Notice that rhashtable is used during replay only,
as removal with list_del() is less expensive and it's still used
during regular processing.
before:
1048576 records - 95 seconds
2097152 records - 580 seconds
after:
1048576 records - 2 seconds
2097152 records - 3 seconds
4194304 records - 7 seconds
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
---
v1->v2:
include rhashtable header in jbd2.h
---
fs/jbd2/recovery.c | 4 +++
fs/jbd2/revoke.c | 65 +++++++++++++++++++++++++++++++-------------
include/linux/jbd2.h | 7 +++++
3 files changed, 57 insertions(+), 19 deletions(-)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 667f67342c52..d9287439171c 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -294,6 +294,10 @@ int jbd2_journal_recover(journal_t *journal)
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
+ err = jbd2_journal_init_recovery_revoke(journal);
+ if (err)
+ return err;
+
/*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..d6e96099e9c9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -90,6 +90,7 @@
#include <linux/bio.h>
#include <linux/log2.h>
#include <linux/hash.h>
+#include <linux/rhashtable.h>
#endif
static struct kmem_cache *jbd2_revoke_record_cache;
@@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_table_cache;
struct jbd2_revoke_record_s
{
- struct list_head hash;
+ union {
+ struct list_head hash;
+ struct rhash_head linkage;
+ };
tid_t sequence; /* Used for recovery only */
unsigned long long blocknr;
};
@@ -680,13 +684,22 @@ static void flush_descriptor(journal_t *journal,
* single block.
*/
+static const struct rhashtable_params revoke_rhashtable_params = {
+ .key_len = sizeof(unsigned long long),
+ .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr),
+ .head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
+};
+
int jbd2_journal_set_revoke(journal_t *journal,
unsigned long long blocknr,
tid_t sequence)
{
struct jbd2_revoke_record_s *record;
+ gfp_t gfp_mask = GFP_NOFS;
+ int err;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (record) {
/* If we have multiple occurrences, only record the
* latest sequence number in the hashed record */
@@ -694,7 +707,22 @@ int jbd2_journal_set_revoke(journal_t *journal,
record->sequence = sequence;
return 0;
}
- return insert_revoke_hash(journal, blocknr, sequence);
+
+ if (journal_oom_retry)
+ gfp_mask |= __GFP_NOFAIL;
+ record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
+ if (!record)
+ return -ENOMEM;
+
+ record->sequence = sequence;
+ record->blocknr = blocknr;
+ err = rhashtable_lookup_insert_fast(&journal->j_revoke_rhtable,
+ &record->linkage,
+ revoke_rhashtable_params);
+ if (err)
+ kmem_cache_free(jbd2_revoke_record_cache, record);
+
+ return err;
}
/*
@@ -710,7 +738,8 @@ int jbd2_journal_test_revoke(journal_t *journal,
{
struct jbd2_revoke_record_s *record;
- record = find_revoke_record(journal, blocknr);
+ record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
+ revoke_rhashtable_params);
if (!record)
return 0;
if (tid_gt(sequence, record->sequence))
@@ -718,6 +747,17 @@ int jbd2_journal_test_revoke(journal_t *journal,
return 1;
}
+int jbd2_journal_init_recovery_revoke(journal_t *journal)
+{
+ return rhashtable_init(&journal->j_revoke_rhtable,
+ &revoke_rhashtable_params);
+}
+
+static void jbd2_revoke_record_free(void *ptr, void *arg)
+{
+ kmem_cache_free(jbd2_revoke_record_cache, ptr);
+}
+
/*
* Finally, once recovery is over, we need to clear the revoke table so
* that it can be reused by the running filesystem.
@@ -725,19 +765,6 @@ int jbd2_journal_test_revoke(journal_t *journal,
void jbd2_journal_clear_revoke(journal_t *journal)
{
- int i;
- struct list_head *hash_list;
- struct jbd2_revoke_record_s *record;
- struct jbd2_revoke_table_s *revoke;
-
- revoke = journal->j_revoke;
-
- for (i = 0; i < revoke->hash_size; i++) {
- hash_list = &revoke->hash_table[i];
- while (!list_empty(hash_list)) {
- record = (struct jbd2_revoke_record_s*) hash_list->next;
- list_del(&record->hash);
- kmem_cache_free(jbd2_revoke_record_cache, record);
- }
- }
+ rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
+ jbd2_revoke_record_free, NULL);
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..2b0aa1e159b8 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/bit_spinlock.h>
#include <linux/blkdev.h>
+#include <linux/rhashtable-types.h>
#include <crypto/hash.h>
#endif
@@ -1122,6 +1123,11 @@ struct journal_s
*/
struct jbd2_revoke_table_s *j_revoke_table[2];
+ /**
+ * @j_revoke_rhtable: rhashtable for revoke records during recovery
+ */
+ struct rhashtable j_revoke_rhtable;
+
/**
* @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
*/
@@ -1644,6 +1650,7 @@ extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
/* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
+extern int jbd2_journal_init_recovery_revoke(journal_t *);
extern void jbd2_journal_clear_revoke(journal_t *);
extern void jbd2_journal_switch_revoke_table(journal_t *journal);
extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
--
2.47.0
next reply other threads:[~2024-11-05 4:18 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-05 3:44 Li Dongyang [this message]
2024-11-08 10:33 ` [PATCH V2] jbd2: use rhashtable for revoke records during replay Jan Kara
2024-11-08 16:11 ` Theodore Ts'o
2024-11-12 18:44 ` Andreas Dilger
2024-11-13 14:47 ` Jan Kara
2025-01-16 0:08 ` Andreas Dilger
2025-01-16 18:04 ` Jan Kara
2024-11-09 3:12 ` Zhang Yi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241105034428.578701-1-dongyangli@ddn.com \
--to=dongyangli@ddn.com \
--cc=adilger@dilger.ca \
--cc=bzzz@whamcloud.com \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox