linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: sct@redhat.com
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH] Split JBD checkpoint list
Date: Tue, 24 May 2005 21:03:23 +0200	[thread overview]
Message-ID: <20050524190323.GA28720@atrey.karlin.mff.cuni.cz> (raw)

[-- Attachment #1: Type: text/plain, Size: 591 bytes --]

  Hello!

  Attached patch add new transaction checkpoint list containing buffers
which were already submitted for IO as Stephen once suggested. The patch
should close some possible false assertion failures and maybe gain a bit
of performance under some load. I gave the patch some basic testing.
Stephen, can you please check the patch? I want to run some
more tests (any suggestions?) and also check that journalling
correctness was preserved (probably by running some IO load under UML,
killing UML processes randomly and veryfying that FS is OK after the
journal replay).

								Honza

[-- Attachment #2: checkpoint-2.6.12-rc4.diff --]
[-- Type: text/plain, Size: 8719 bytes --]

diff -rupX /home/jack/.kerndiffexclude linux-2.6.12-rc4/fs/jbd/checkpoint.c linux-2.6.12-rc4-1-checkpointfix/fs/jbd/checkpoint.c
--- linux-2.6.12-rc4/fs/jbd/checkpoint.c	2005-03-03 18:58:29.000000000 +0100
+++ linux-2.6.12-rc4-1-checkpointfix/fs/jbd/checkpoint.c	2005-05-24 20:30:48.000000000 +0200
@@ -24,24 +24,68 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction. 
+ * Unlink a buffer from a transaction checkpoint list.
  *
  * Called with j_list_lock held.
  */
 
-static inline void __buffer_unlink(struct journal_head *jh)
+static inline void __buffer_unlink_first(struct journal_head *jh)
 {
 	transaction_t *transaction;
 
 	transaction = jh->b_cp_transaction;
-	jh->b_cp_transaction = NULL;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
+	if (transaction->t_checkpoint_list == jh) {
 		transaction->t_checkpoint_list = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
-		transaction->t_checkpoint_list = NULL;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+	__buffer_unlink_first(jh);
+	
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
 }
 
 /*
@@ -117,7 +161,53 @@ static void jbd_sync_bh(journal_t *journ
 }
 
 /*
- * Clean up a transaction's checkpoint list.  
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
+ *
+ * Called with j_list_lock held.
+ */
+
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+{
+	struct journal_head *jh, *next_jh, *last_jh;
+	struct buffer_head *bh;
+	tid_t this_tid;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Didn't somebody cleaned up the transaction in the meanwhile */
+	if (journal->j_checkpoint_transactions != transaction ||
+		transaction->t_tid != this_tid)
+		return;
+	jh = transaction->t_checkpoint_io_list;
+	if (!jh)
+		return;
+	last_jh = jh->b_cpprev;
+	next_jh = jh;
+	do {
+		jh = next_jh;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			atomic_inc(&bh->b_count);
+			spin_unlock(&journal->j_list_lock);
+			wait_on_buffer(bh);
+			/* the journal_head may have gone by now */
+			BUFFER_TRACE(bh, "brelse");
+			__brelse(bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		next_jh = jh->b_cpnext;
+		/* Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list */
+		__journal_remove_checkpoint(jh);
+	} while (jh != last_jh);
+}
+
+/*
+ * Clean up a transaction's checkpoint lists.
  *
  * We wait for any pending IO to complete and make sure any clean
  * buffers are removed from the transaction. 
@@ -188,9 +278,13 @@ static int __cleanup_transaction(journal
 		} else {
 			jbd_unlock_bh_state(bh);
 		}
-		jh = next_jh;
 	} while (jh != last_jh);
 
+	/* The 2nd condition is evaluated only if we didn't drop j_list_lock */
+	if (!ret && transaction->t_checkpoint_io_list != NULL)
+		ret = 1;
+	__wait_cp_io(journal, transaction);
+
 	return ret;
 out_return_1:
 	spin_lock(&journal->j_list_lock);
@@ -247,6 +341,7 @@ static int __flush_buffer(journal_t *jou
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
@@ -339,8 +434,10 @@ int log_do_checkpoint(journal_t *journal
 			}
 		} while (jh != last_jh && !retry);
 
-		if (batch_count)
+		if (batch_count) {
 			__flush_batch(journal, bhs, &batch_count);
+			retry = 1;
+		}
 
 		/*
 		 * If someone cleaned up this transaction while we slept, we're
@@ -455,6 +552,45 @@ int cleanup_journal_tail(journal_t *jour
 /* Checkpoint list management */
 
 /*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret = 0;
+
+	if (!jh)
+		return 0;
+
+ 	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh)))
+			ret += __try_to_free_cp_buf(jh);
+		/*
+		 * This function only frees up some memory
+		 * if possible so we dont have an obligation
+		 * to finish processing. Bail out if preemption
+		 * requested:
+		 */
+		if (need_resched())
+			return ret;
+	} while (jh != last_jh);
+
+	return ret;
+}
+
+/*
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
@@ -476,31 +612,20 @@ int __journal_clean_checkpoint_list(jour
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
-		struct journal_head *jh;
-
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		jh = transaction->t_checkpoint_list;
-		if (jh) {
-			struct journal_head *last_jh = jh->b_cpprev;
-			struct journal_head *next_jh = jh;
-
-			do {
-				jh = next_jh;
-				next_jh = jh->b_cpnext;
-				/* Use trylock because of the ranknig */
-				if (jbd_trylock_bh_state(jh2bh(jh)))
-					ret += __try_to_free_cp_buf(jh);
-				/*
-				 * This function only frees up some memory
-				 * if possible so we dont have an obligation
-				 * to finish processing. Bail out if preemption
-				 * requested:
-				 */
-				if (need_resched())
-					goto out;
-			} while (jh != last_jh);
-		}
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list);
+		if (need_resched())
+			goto out;
+		/* It is essential that we are as careful as in the case of
+		   t_checkpoint_list with removing the buffer from the list
+		   as we can possibly see not yet submitted buffers on
+		   io_list */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list);
+		if (need_resched())
+			goto out;
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -537,8 +662,10 @@ void __journal_remove_checkpoint(struct 
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL)
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -627,6 +754,7 @@ void __journal_drop_transaction(journal_
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff -rupX /home/jack/.kerndiffexclude linux-2.6.12-rc4/include/linux/jbd.h linux-2.6.12-rc4-1-checkpointfix/include/linux/jbd.h
--- linux-2.6.12-rc4/include/linux/jbd.h	2005-05-18 15:11:07.000000000 +0200
+++ linux-2.6.12-rc4-1-checkpointfix/include/linux/jbd.h	2005-05-22 23:15:09.000000000 +0200
@@ -499,6 +499,12 @@ struct transaction_s 
 	struct journal_head	*t_checkpoint_list;
 
 	/*
+	 * Doubly-linked circular list of all buffers submitted for IO while
+	 * checkpointing. [j_list_lock]
+	 */
+	struct journal_head	*t_checkpoint_io_list;
+
+	/*
 	 * Doubly-linked circular list of temporary buffers currently undergoing
 	 * IO in the log [j_list_lock]
 	 */

                 reply	other threads:[~2005-05-24 19:03 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050524190323.GA28720@atrey.karlin.mff.cuni.cz \
    --to=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sct@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).