All of lore.kernel.org
 help / color / mirror / Atom feed
* jbd2: avoid infinite loop when destroying aborted journal
@ 2015-10-05 15:54 Jan Kara
  2015-10-18  0:21 ` Greg KH
  2015-10-19 10:18 ` Luis Henriques
  0 siblings, 2 replies; 3+ messages in thread
From: Jan Kara @ 2015-10-05 15:54 UTC (permalink / raw)
  To: stable; +Cc: Jan, "Kara <jack", Theodore Ts'o

From: Jan Kara <jack@suse.com

Backport of commit 841df7df196237ea63233f0f9eaa41db53afd70f for
3.11, 3.12, 3.14 stable kernels.

Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal
superblock fails" changed jbd2_cleanup_journal_tail() to return EIO
when the journal is aborted. That makes logic in
jbd2_log_do_checkpoint() bail out which is fine, except that
jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make
a progress in cleaning the journal. Without it jbd2_journal_destroy()
just loops in an infinite loop.

Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of
jbd2_log_do_checkpoint() fails with error.

Reported-by: Eryu Guan <guaneryu@gmail.com>
Tested-by: Eryu Guan <guaneryu@gmail.com>
Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a
Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/checkpoint.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/checkpoint.c
--- linux-3.12-SLE12/fs/jbd2/checkpoint.c	2015-08-28 11:43:12.372739350 +0200
+++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/checkpoint.c	2015-10-02 21:19:36.569652149 +0200
@@ -475,14 +475,15 @@ int jbd2_cleanup_journal_tail(journal_t
  * journal_clean_one_cp_list
  *
  * Find all the written-back checkpoint buffers in the given list and
- * release them.
+ * release them. If 'destroy' is set, clean all buffers unconditionally.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
  * Returns number of buffers reaped (for debug)
  */
 
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy,
+				     int *released)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
@@ -496,7 +497,10 @@ static int journal_clean_one_cp_list(str
 	do {
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
-		ret = __try_to_free_cp_buf(jh);
+		if (!destroy)
+			ret = __try_to_free_cp_buf(jh);
+		else
+			ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		if (ret) {
 			freed++;
 			if (ret == 2) {
@@ -521,13 +525,14 @@ static int journal_clean_one_cp_list(str
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
+ * If 'destroy' is set, release all buffers unconditionally.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
  * Returns number of buffers reaped (for debug)
  */
 
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+int __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
@@ -543,7 +548,7 @@ int __jbd2_journal_clean_checkpoint_list
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
 		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
+				t_checkpoint_list, destroy, &released);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
@@ -559,7 +564,7 @@ int __jbd2_journal_clean_checkpoint_list
 		 * we can possibly see not yet submitted buffers on io_list
 		 */
 		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
+				t_checkpoint_io_list, destroy, &released);
 		if (need_resched())
 			goto out;
 	} while (transaction != last_transaction);
@@ -568,6 +573,28 @@ out:
 }
 
 /*
+ * Remove buffers from all checkpoint lists as journal is aborted and we just
+ * need to free memory
+ */
+void jbd2_journal_destroy_checkpoint(journal_t *journal)
+{
+	/*
+	 * We loop because __jbd2_journal_clean_checkpoint_list() may abort
+	 * early due to a need of rescheduling.
+	 */
+	while (1) {
+		spin_lock(&journal->j_list_lock);
+		if (!journal->j_checkpoint_transactions) {
+			spin_unlock(&journal->j_list_lock);
+			break;
+		}
+		__jbd2_journal_clean_checkpoint_list(journal, true);
+		spin_unlock(&journal->j_list_lock);
+		cond_resched();
+	}
+}
+
+/*
  * journal_remove_checkpoint: called after a buffer has been committed
  * to disk (either by being write-back flushed to disk, or being
  * committed to the log).
diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/commit.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/commit.c
--- linux-3.12-SLE12/fs/jbd2/commit.c	2015-08-28 11:43:06.258721198 +0200
+++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/commit.c	2015-10-02 21:15:47.755275630 +0200
@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(jou
 	 * frees some memory
 	 */
 	spin_lock(&journal->j_list_lock);
-	__jbd2_journal_clean_checkpoint_list(journal);
+	__jbd2_journal_clean_checkpoint_list(journal, false);
 	spin_unlock(&journal->j_list_lock);
 
 	jbd_debug(3, "JBD2: commit phase 1\n");
diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/journal.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/journal.c
--- linux-3.12-SLE12/fs/jbd2/journal.c	2015-08-28 11:43:50.493852324 +0200
+++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/journal.c	2015-10-02 21:15:47.756275638 +0200
@@ -1710,8 +1710,17 @@ int jbd2_journal_destroy(journal_t *jour
 	while (journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_log_do_checkpoint(journal);
+		err = jbd2_log_do_checkpoint(journal);
 		mutex_unlock(&journal->j_checkpoint_mutex);
+		/*
+		 * If checkpointing failed, just free the buffers to avoid
+		 * looping forever
+		 */
+		if (err) {
+			jbd2_journal_destroy_checkpoint(journal);
+			spin_lock(&journal->j_list_lock);
+			break;
+		}
 		spin_lock(&journal->j_list_lock);
 	}
 
diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/include/linux/jbd2.h linux-3.12-SLE12-jbd2_loop/include/linux/jbd2.h
--- linux-3.12-SLE12/include/linux/jbd2.h	2015-08-28 11:43:12.404739445 +0200
+++ linux-3.12-SLE12-jbd2_loop/include/linux/jbd2.h	2015-10-02 21:16:33.938351449 +0200
@@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *jou
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+int __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
+void jbd2_journal_destroy_checkpoint(journal_t *journal);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: jbd2: avoid infinite loop when destroying aborted journal
  2015-10-05 15:54 jbd2: avoid infinite loop when destroying aborted journal Jan Kara
@ 2015-10-18  0:21 ` Greg KH
  2015-10-19 10:18 ` Luis Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Greg KH @ 2015-10-18  0:21 UTC (permalink / raw)
  To: Jan Kara; +Cc: stable, Jan, Theodore Ts'o

On Mon, Oct 05, 2015 at 05:54:06PM +0200, Jan Kara wrote:
> From: Jan Kara <jack@suse.com
> 
> Backport of commit 841df7df196237ea63233f0f9eaa41db53afd70f for
> 3.11, 3.12, 3.14 stable kernels.

Applied to 3.14-stable, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: jbd2: avoid infinite loop when destroying aborted journal
  2015-10-05 15:54 jbd2: avoid infinite loop when destroying aborted journal Jan Kara
  2015-10-18  0:21 ` Greg KH
@ 2015-10-19 10:18 ` Luis Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Luis Henriques @ 2015-10-19 10:18 UTC (permalink / raw)
  To: Jan Kara; +Cc: stable, Jan, Theodore Ts'o

On Mon, Oct 05, 2015 at 05:54:06PM +0200, Jan Kara wrote:
> From: Jan Kara <jack@suse.com
> 
> Backport of commit 841df7df196237ea63233f0f9eaa41db53afd70f for
> 3.11, 3.12, 3.14 stable kernels.
>

Thanks, using this for the 3.16 kernel as well.

Cheers,
--
Lu�s


> Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal
> superblock fails" changed jbd2_cleanup_journal_tail() to return EIO
> when the journal is aborted. That makes logic in
> jbd2_log_do_checkpoint() bail out which is fine, except that
> jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make
> a progress in cleaning the journal. Without it jbd2_journal_destroy()
> just loops in an infinite loop.
> 
> Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of
> jbd2_log_do_checkpoint() fails with error.
> 
> Reported-by: Eryu Guan <guaneryu@gmail.com>
> Tested-by: Eryu Guan <guaneryu@gmail.com>
> Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a
> Signed-off-by: Jan Kara <jack@suse.com>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> 
> diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/checkpoint.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/checkpoint.c
> --- linux-3.12-SLE12/fs/jbd2/checkpoint.c	2015-08-28 11:43:12.372739350 +0200
> +++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/checkpoint.c	2015-10-02 21:19:36.569652149 +0200
> @@ -475,14 +475,15 @@ int jbd2_cleanup_journal_tail(journal_t
>   * journal_clean_one_cp_list
>   *
>   * Find all the written-back checkpoint buffers in the given list and
> - * release them.
> + * release them. If 'destroy' is set, clean all buffers unconditionally.
>   *
>   * Called with the journal locked.
>   * Called with j_list_lock held.
>   * Returns number of buffers reaped (for debug)
>   */
>  
> -static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
> +static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy,
> +				     int *released)
>  {
>  	struct journal_head *last_jh;
>  	struct journal_head *next_jh = jh;
> @@ -496,7 +497,10 @@ static int journal_clean_one_cp_list(str
>  	do {
>  		jh = next_jh;
>  		next_jh = jh->b_cpnext;
> -		ret = __try_to_free_cp_buf(jh);
> +		if (!destroy)
> +			ret = __try_to_free_cp_buf(jh);
> +		else
> +			ret = __jbd2_journal_remove_checkpoint(jh) + 1;
>  		if (ret) {
>  			freed++;
>  			if (ret == 2) {
> @@ -521,13 +525,14 @@ static int journal_clean_one_cp_list(str
>   * journal_clean_checkpoint_list
>   *
>   * Find all the written-back checkpoint buffers in the journal and release them.
> + * If 'destroy' is set, release all buffers unconditionally.
>   *
>   * Called with the journal locked.
>   * Called with j_list_lock held.
>   * Returns number of buffers reaped (for debug)
>   */
>  
> -int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
> +int __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
>  {
>  	transaction_t *transaction, *last_transaction, *next_transaction;
>  	int ret = 0;
> @@ -543,7 +548,7 @@ int __jbd2_journal_clean_checkpoint_list
>  		transaction = next_transaction;
>  		next_transaction = transaction->t_cpnext;
>  		ret += journal_clean_one_cp_list(transaction->
> -				t_checkpoint_list, &released);
> +				t_checkpoint_list, destroy, &released);
>  		/*
>  		 * This function only frees up some memory if possible so we
>  		 * dont have an obligation to finish processing. Bail out if
> @@ -559,7 +564,7 @@ int __jbd2_journal_clean_checkpoint_list
>  		 * we can possibly see not yet submitted buffers on io_list
>  		 */
>  		ret += journal_clean_one_cp_list(transaction->
> -				t_checkpoint_io_list, &released);
> +				t_checkpoint_io_list, destroy, &released);
>  		if (need_resched())
>  			goto out;
>  	} while (transaction != last_transaction);
> @@ -568,6 +573,28 @@ out:
>  }
>  
>  /*
> + * Remove buffers from all checkpoint lists as journal is aborted and we just
> + * need to free memory
> + */
> +void jbd2_journal_destroy_checkpoint(journal_t *journal)
> +{
> +	/*
> +	 * We loop because __jbd2_journal_clean_checkpoint_list() may abort
> +	 * early due to a need of rescheduling.
> +	 */
> +	while (1) {
> +		spin_lock(&journal->j_list_lock);
> +		if (!journal->j_checkpoint_transactions) {
> +			spin_unlock(&journal->j_list_lock);
> +			break;
> +		}
> +		__jbd2_journal_clean_checkpoint_list(journal, true);
> +		spin_unlock(&journal->j_list_lock);
> +		cond_resched();
> +	}
> +}
> +
> +/*
>   * journal_remove_checkpoint: called after a buffer has been committed
>   * to disk (either by being write-back flushed to disk, or being
>   * committed to the log).
> diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/commit.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/commit.c
> --- linux-3.12-SLE12/fs/jbd2/commit.c	2015-08-28 11:43:06.258721198 +0200
> +++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/commit.c	2015-10-02 21:15:47.755275630 +0200
> @@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(jou
>  	 * frees some memory
>  	 */
>  	spin_lock(&journal->j_list_lock);
> -	__jbd2_journal_clean_checkpoint_list(journal);
> +	__jbd2_journal_clean_checkpoint_list(journal, false);
>  	spin_unlock(&journal->j_list_lock);
>  
>  	jbd_debug(3, "JBD2: commit phase 1\n");
> diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/fs/jbd2/journal.c linux-3.12-SLE12-jbd2_loop/fs/jbd2/journal.c
> --- linux-3.12-SLE12/fs/jbd2/journal.c	2015-08-28 11:43:50.493852324 +0200
> +++ linux-3.12-SLE12-jbd2_loop/fs/jbd2/journal.c	2015-10-02 21:15:47.756275638 +0200
> @@ -1710,8 +1710,17 @@ int jbd2_journal_destroy(journal_t *jour
>  	while (journal->j_checkpoint_transactions != NULL) {
>  		spin_unlock(&journal->j_list_lock);
>  		mutex_lock(&journal->j_checkpoint_mutex);
> -		jbd2_log_do_checkpoint(journal);
> +		err = jbd2_log_do_checkpoint(journal);
>  		mutex_unlock(&journal->j_checkpoint_mutex);
> +		/*
> +		 * If checkpointing failed, just free the buffers to avoid
> +		 * looping forever
> +		 */
> +		if (err) {
> +			jbd2_journal_destroy_checkpoint(journal);
> +			spin_lock(&journal->j_list_lock);
> +			break;
> +		}
>  		spin_lock(&journal->j_list_lock);
>  	}
>  
> diff -rupX /crypted/home/jack/.kerndiffexclude linux-3.12-SLE12/include/linux/jbd2.h linux-3.12-SLE12-jbd2_loop/include/linux/jbd2.h
> --- linux-3.12-SLE12/include/linux/jbd2.h	2015-08-28 11:43:12.404739445 +0200
> +++ linux-3.12-SLE12-jbd2_loop/include/linux/jbd2.h	2015-10-02 21:16:33.938351449 +0200
> @@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *jou
>  extern void jbd2_journal_commit_transaction(journal_t *);
>  
>  /* Checkpoint list management */
> -int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
> +int __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
>  int __jbd2_journal_remove_checkpoint(struct journal_head *);
> +void jbd2_journal_destroy_checkpoint(journal_t *journal);
>  void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
>  
>  
> --
> To unsubscribe from this list: send the line "unsubscribe stable" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-10-19 10:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-05 15:54 jbd2: avoid infinite loop when destroying aborted journal Jan Kara
2015-10-18  0:21 ` Greg KH
2015-10-19 10:18 ` Luis Henriques

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.