* [PATCH] Split JBD checkpoint list
@ 2005-05-24 19:03 Jan Kara
0 siblings, 0 replies; only message in thread
From: Jan Kara @ 2005-05-24 19:03 UTC (permalink / raw)
To: sct; +Cc: linux-fsdevel
[-- Attachment #1: Type: text/plain, Size: 591 bytes --]
Hello!
Attached patch add new transaction checkpoint list containing buffers
which were already submitted for IO as Stephen once suggested. The patch
should close some possible false assertion failures and maybe gain a bit
of performance under some load. I gave the patch some basic testing.
Stephen, can you please check the patch? I want to run some
more tests (any suggestions?) and also check that journalling
correctness was preserved (probably by running some IO load under UML,
killing UML processes randomly and veryfying that FS is OK after the
journal replay).
Honza
[-- Attachment #2: checkpoint-2.6.12-rc4.diff --]
[-- Type: text/plain, Size: 8719 bytes --]
diff -rupX /home/jack/.kerndiffexclude linux-2.6.12-rc4/fs/jbd/checkpoint.c linux-2.6.12-rc4-1-checkpointfix/fs/jbd/checkpoint.c
--- linux-2.6.12-rc4/fs/jbd/checkpoint.c 2005-03-03 18:58:29.000000000 +0100
+++ linux-2.6.12-rc4-1-checkpointfix/fs/jbd/checkpoint.c 2005-05-24 20:30:48.000000000 +0200
@@ -24,24 +24,68 @@
#include <linux/slab.h>
/*
- * Unlink a buffer from a transaction.
+ * Unlink a buffer from a transaction checkpoint list.
*
* Called with j_list_lock held.
*/
-static inline void __buffer_unlink(struct journal_head *jh)
+static inline void __buffer_unlink_first(struct journal_head *jh)
{
transaction_t *transaction;
transaction = jh->b_cp_transaction;
- jh->b_cp_transaction = NULL;
jh->b_cpnext->b_cpprev = jh->b_cpprev;
jh->b_cpprev->b_cpnext = jh->b_cpnext;
- if (transaction->t_checkpoint_list == jh)
+ if (transaction->t_checkpoint_list == jh) {
transaction->t_checkpoint_list = jh->b_cpnext;
- if (transaction->t_checkpoint_list == jh)
- transaction->t_checkpoint_list = NULL;
+ if (transaction->t_checkpoint_list == jh)
+ transaction->t_checkpoint_list = NULL;
+ }
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+ transaction_t *transaction;
+
+ transaction = jh->b_cp_transaction;
+
+ __buffer_unlink_first(jh);
+ if (transaction->t_checkpoint_io_list == jh) {
+ transaction->t_checkpoint_io_list = jh->b_cpnext;
+ if (transaction->t_checkpoint_io_list == jh)
+ transaction->t_checkpoint_io_list = NULL;
+ }
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+ transaction_t *transaction;
+
+ transaction = jh->b_cp_transaction;
+ __buffer_unlink_first(jh);
+
+ if (!transaction->t_checkpoint_io_list) {
+ jh->b_cpnext = jh->b_cpprev = jh;
+ } else {
+ jh->b_cpnext = transaction->t_checkpoint_io_list;
+ jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+ jh->b_cpprev->b_cpnext = jh;
+ jh->b_cpnext->b_cpprev = jh;
+ }
+ transaction->t_checkpoint_io_list = jh;
}
/*
@@ -117,7 +161,53 @@ static void jbd_sync_bh(journal_t *journ
}
/*
- * Clean up a transaction's checkpoint list.
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
+ *
+ * Called with j_list_lock held.
+ */
+
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+{
+ struct journal_head *jh, *next_jh, *last_jh;
+ struct buffer_head *bh;
+ tid_t this_tid;
+
+ this_tid = transaction->t_tid;
+restart:
+ /* Didn't somebody cleaned up the transaction in the meanwhile */
+ if (journal->j_checkpoint_transactions != transaction ||
+ transaction->t_tid != this_tid)
+ return;
+ jh = transaction->t_checkpoint_io_list;
+ if (!jh)
+ return;
+ last_jh = jh->b_cpprev;
+ next_jh = jh;
+ do {
+ jh = next_jh;
+ bh = jh2bh(jh);
+ if (buffer_locked(bh)) {
+ atomic_inc(&bh->b_count);
+ spin_unlock(&journal->j_list_lock);
+ wait_on_buffer(bh);
+ /* the journal_head may have gone by now */
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
+ spin_lock(&journal->j_list_lock);
+ goto restart;
+ }
+ next_jh = jh->b_cpnext;
+ /* Now in whatever state the buffer currently is, we know that
+ * it has been written out and so we can drop it from the list */
+ __journal_remove_checkpoint(jh);
+ } while (jh != last_jh);
+}
+
+/*
+ * Clean up a transaction's checkpoint lists.
*
* We wait for any pending IO to complete and make sure any clean
* buffers are removed from the transaction.
@@ -188,9 +278,13 @@ static int __cleanup_transaction(journal
} else {
jbd_unlock_bh_state(bh);
}
- jh = next_jh;
} while (jh != last_jh);
+ /* The 2nd condition is evaluated only if we didn't drop j_list_lock */
+ if (!ret && transaction->t_checkpoint_io_list != NULL)
+ ret = 1;
+ __wait_cp_io(journal, transaction);
+
return ret;
out_return_1:
spin_lock(&journal->j_list_lock);
@@ -247,6 +341,7 @@ static int __flush_buffer(journal_t *jou
J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_buffer_jwrite(bh);
bhs[*batch_count] = bh;
+ __buffer_relink_io(jh);
jbd_unlock_bh_state(bh);
(*batch_count)++;
if (*batch_count == NR_BATCH) {
@@ -339,8 +434,10 @@ int log_do_checkpoint(journal_t *journal
}
} while (jh != last_jh && !retry);
- if (batch_count)
+ if (batch_count) {
__flush_batch(journal, bhs, &batch_count);
+ retry = 1;
+ }
/*
* If someone cleaned up this transaction while we slept, we're
@@ -455,6 +552,45 @@ int cleanup_journal_tail(journal_t *jour
/* Checkpoint list management */
/*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh)
+{
+ struct journal_head *last_jh;
+ struct journal_head *next_jh = jh;
+ int ret = 0;
+
+ if (!jh)
+ return 0;
+
+ last_jh = jh->b_cpprev;
+ do {
+ jh = next_jh;
+ next_jh = jh->b_cpnext;
+ /* Use trylock because of the ranking */
+ if (jbd_trylock_bh_state(jh2bh(jh)))
+ ret += __try_to_free_cp_buf(jh);
+ /*
+ * This function only frees up some memory
+ * if possible so we dont have an obligation
+ * to finish processing. Bail out if preemption
+ * requested:
+ */
+ if (need_resched())
+ return ret;
+ } while (jh != last_jh);
+
+ return ret;
+}
+
+/*
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
@@ -476,31 +612,20 @@ int __journal_clean_checkpoint_list(jour
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
- struct journal_head *jh;
-
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
- jh = transaction->t_checkpoint_list;
- if (jh) {
- struct journal_head *last_jh = jh->b_cpprev;
- struct journal_head *next_jh = jh;
-
- do {
- jh = next_jh;
- next_jh = jh->b_cpnext;
- /* Use trylock because of the ranknig */
- if (jbd_trylock_bh_state(jh2bh(jh)))
- ret += __try_to_free_cp_buf(jh);
- /*
- * This function only frees up some memory
- * if possible so we dont have an obligation
- * to finish processing. Bail out if preemption
- * requested:
- */
- if (need_resched())
- goto out;
- } while (jh != last_jh);
- }
+ ret += journal_clean_one_cp_list(transaction->
+ t_checkpoint_list);
+ if (need_resched())
+ goto out;
+ /* It is essential that we are as careful as in the case of
+ t_checkpoint_list with removing the buffer from the list
+ as we can possibly see not yet submitted buffers on
+ io_list */
+ ret += journal_clean_one_cp_list(transaction->
+ t_checkpoint_io_list);
+ if (need_resched())
+ goto out;
} while (transaction != last_transaction);
out:
return ret;
@@ -537,8 +662,10 @@ void __journal_remove_checkpoint(struct
journal = transaction->t_journal;
__buffer_unlink(jh);
+ jh->b_cp_transaction = NULL;
- if (transaction->t_checkpoint_list != NULL)
+ if (transaction->t_checkpoint_list != NULL ||
+ transaction->t_checkpoint_io_list != NULL)
goto out;
JBUFFER_TRACE(jh, "transaction has no more buffers");
@@ -627,6 +754,7 @@ void __journal_drop_transaction(journal_
J_ASSERT(transaction->t_shadow_list == NULL);
J_ASSERT(transaction->t_log_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL);
+ J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(transaction->t_updates == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff -rupX /home/jack/.kerndiffexclude linux-2.6.12-rc4/include/linux/jbd.h linux-2.6.12-rc4-1-checkpointfix/include/linux/jbd.h
--- linux-2.6.12-rc4/include/linux/jbd.h 2005-05-18 15:11:07.000000000 +0200
+++ linux-2.6.12-rc4-1-checkpointfix/include/linux/jbd.h 2005-05-22 23:15:09.000000000 +0200
@@ -499,6 +499,12 @@ struct transaction_s
struct journal_head *t_checkpoint_list;
/*
+ * Doubly-linked circular list of all buffers submitted for IO while
+ * checkpointing. [j_list_lock]
+ */
+ struct journal_head *t_checkpoint_io_list;
+
+ /*
* Doubly-linked circular list of temporary buffers currently undergoing
* IO in the log [j_list_lock]
*/
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2005-05-24 19:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-24 19:03 [PATCH] Split JBD checkpoint list Jan Kara
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).