public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Jan Kara <jack@suse.cz>,
	Zhang Yi <yi.zhang@huawei.com>,
	Zhihao Cheng <chengzhihao1@huawei.com>,
	Theodore Tso <tytso@mit.edu>
Subject: [PATCH 5.15 76/78] jbd2: recheck chechpointing non-dirty buffer
Date: Tue, 25 Jul 2023 12:47:07 +0200	[thread overview]
Message-ID: <20230725104454.221311267@linuxfoundation.org> (raw)
In-Reply-To: <20230725104451.275227789@linuxfoundation.org>

From: Zhang Yi <yi.zhang@huawei.com>

commit c2d6fd9d6f35079f1669f0100f05b46708c74b7f upstream.

There is a long-standing metadata corruption issue that happens from
time to time, but it's very difficult to reproduce and analyse, benefit
from the JBD2_CYCLE_RECORD option, we found out that the problem is the
checkpointing process miss to write out some buffers which are raced by
another do_get_write_access(). Looks below for detail.

jbd2_log_do_checkpoint() //transaction X
 //buffer A is dirty and not belones to any transaction
 __buffer_relink_io() //move it to the IO list
 __flush_batch()
  write_dirty_buffer()
                             do_get_write_access()
                             clear_buffer_dirty
                             __jbd2_journal_file_buffer()
                             //add buffer A to a new transaction Y
   lock_buffer(bh)
   //doesn't write out
 __jbd2_journal_remove_checkpoint()
 //finish checkpoint except buffer A
 //filesystem corrupt if the new transaction Y isn't fully write out.

Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint()
have already handles waiting for buffers under IO and re-added new
transaction to complete commit, and it also removing cleaned buffers,
this makes sure the list will eventually get empty. So it's fine to
leave buffers on the t_checkpoint_list while flushing out and completely
stop using the t_checkpoint_io_list.

Cc: stable@vger.kernel.org
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Tested-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230606135928.434610-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/checkpoint.c |  102 ++++++++++++++-------------------------------------
 1 file changed, 29 insertions(+), 73 deletions(-)

--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -58,28 +58,6 @@ static inline void __buffer_unlink(struc
 }
 
 /*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
-	transaction_t *transaction = jh->b_cp_transaction;
-
-	__buffer_unlink_first(jh);
-
-	if (!transaction->t_checkpoint_io_list) {
-		jh->b_cpnext = jh->b_cpprev = jh;
-	} else {
-		jh->b_cpnext = transaction->t_checkpoint_io_list;
-		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
-		jh->b_cpprev->b_cpnext = jh;
-		jh->b_cpnext->b_cpprev = jh;
-	}
-	transaction->t_checkpoint_io_list = jh;
-}
-
-/*
  * Check a checkpoint buffer could be release or not.
  *
  * Requires j_list_lock
@@ -183,6 +161,7 @@ __flush_batch(journal_t *journal, int *b
 		struct buffer_head *bh = journal->j_chkpt_bhs[i];
 		BUFFER_TRACE(bh, "brelse");
 		__brelse(bh);
+		journal->j_chkpt_bhs[i] = NULL;
 	}
 	*batch_count = 0;
 }
@@ -242,6 +221,11 @@ restart:
 		jh = transaction->t_checkpoint_list;
 		bh = jh2bh(jh);
 
+		/*
+		 * The buffer may be writing back, or flushing out in the
+		 * last couple of cycles, or re-adding into a new transaction,
+		 * need to check it again until it's unlocked.
+		 */
 		if (buffer_locked(bh)) {
 			get_bh(bh);
 			spin_unlock(&journal->j_list_lock);
@@ -287,28 +271,32 @@ restart:
 		}
 		if (!buffer_dirty(bh)) {
 			BUFFER_TRACE(bh, "remove from checkpoint");
-			if (__jbd2_journal_remove_checkpoint(jh))
-				/* The transaction was released; we're done */
+			/*
+			 * If the transaction was released or the checkpoint
+			 * list was empty, we're done.
+			 */
+			if (__jbd2_journal_remove_checkpoint(jh) ||
+			    !transaction->t_checkpoint_list)
 				goto out;
-			continue;
+		} else {
+			/*
+			 * We are about to write the buffer, it could be
+			 * raced by some other transaction shrink or buffer
+			 * re-log logic once we release the j_list_lock,
+			 * leave it on the checkpoint list and check status
+			 * again to make sure it's clean.
+			 */
+			BUFFER_TRACE(bh, "queue");
+			get_bh(bh);
+			J_ASSERT_BH(bh, !buffer_jwrite(bh));
+			journal->j_chkpt_bhs[batch_count++] = bh;
+			transaction->t_chp_stats.cs_written++;
+			transaction->t_checkpoint_list = jh->b_cpnext;
 		}
-		/*
-		 * Important: we are about to write the buffer, and
-		 * possibly block, while still holding the journal
-		 * lock.  We cannot afford to let the transaction
-		 * logic start messing around with this buffer before
-		 * we write it to disk, as that would break
-		 * recoverability.
-		 */
-		BUFFER_TRACE(bh, "queue");
-		get_bh(bh);
-		J_ASSERT_BH(bh, !buffer_jwrite(bh));
-		journal->j_chkpt_bhs[batch_count++] = bh;
-		__buffer_relink_io(jh);
-		transaction->t_chp_stats.cs_written++;
+
 		if ((batch_count == JBD2_NR_BATCH) ||
-		    need_resched() ||
-		    spin_needbreak(&journal->j_list_lock))
+		    need_resched() || spin_needbreak(&journal->j_list_lock) ||
+		    jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
 			goto unlock_and_flush;
 	}
 
@@ -322,38 +310,6 @@ restart:
 			goto restart;
 	}
 
-	/*
-	 * Now we issued all of the transaction's buffers, let's deal
-	 * with the buffers that are out for I/O.
-	 */
-restart2:
-	/* Did somebody clean up the transaction in the meanwhile? */
-	if (journal->j_checkpoint_transactions != transaction ||
-	    transaction->t_tid != this_tid)
-		goto out;
-
-	while (transaction->t_checkpoint_io_list) {
-		jh = transaction->t_checkpoint_io_list;
-		bh = jh2bh(jh);
-		if (buffer_locked(bh)) {
-			get_bh(bh);
-			spin_unlock(&journal->j_list_lock);
-			wait_on_buffer(bh);
-			/* the journal_head may have gone by now */
-			BUFFER_TRACE(bh, "brelse");
-			__brelse(bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart2;
-		}
-
-		/*
-		 * Now in whatever state the buffer currently is, we
-		 * know that it has been written out and so we can
-		 * drop it from the list
-		 */
-		if (__jbd2_journal_remove_checkpoint(jh))
-			break;
-	}
 out:
 	spin_unlock(&journal->j_list_lock);
 	result = jbd2_cleanup_journal_tail(journal);



  parent reply	other threads:[~2023-07-25 11:11 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-25 10:45 [PATCH 5.15 00/78] 5.15.123-rc1 review Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 01/78] ALSA: hda/realtek - remove 3k pull low procedure Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 02/78] ALSA: hda/realtek: Add quirk for Clevo NS70AU Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 03/78] ALSA: hda/realtek: Enable Mute LED on HP Laptop 15s-eq2xxx Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 04/78] keys: Fix linking a duplicate key to a keyrings assoc_array Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 05/78] perf probe: Add test for regression introduced by switch to die_get_decl_file() Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 06/78] btrfs: fix warning when putting transaction with qgroups enabled after abort Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 07/78] fuse: revalidate: dont invalidate if interrupted Greg Kroah-Hartman
2023-07-25 10:45 ` [PATCH 5.15 08/78] btrfs: zoned: fix memory leak after finding block group with super blocks Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 09/78] fuse: ioctl: translate ENOSYS in outarg Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 10/78] selftests: tc: set timeout to 15 minutes Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 11/78] selftests: tc: add ct action kconfig dep Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 12/78] regmap: Drop initial version of maximum transfer length fixes Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 13/78] regmap: Account for register length in SMBus I/O limits Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 14/78] can: bcm: Fix UAF in bcm_proc_show() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 15/78] selftests: tc: add ConnTrack procfs kconfig Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 16/78] drm/client: Fix memory leak in drm_client_target_cloned Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 17/78] drm/client: Fix memory leak in drm_client_modeset_probe Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 18/78] drm/amd/display: Disable MPC split by default on special asic Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 19/78] drm/amd/display: Keep PHY active for DP displays on DCN31 Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 20/78] ASoC: fsl_sai: Disable bit clock with transmitter Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 21/78] ASoC: codecs: wcd938x: fix missing clsh ctrl error handling Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 22/78] ASoC: codecs: wcd-mbhc-v2: fix resource leaks on component remove Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 23/78] ASoC: codecs: wcd938x: " Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 24/78] ASoC: codecs: wcd938x: fix missing mbhc init error handling Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 25/78] ASoC: codecs: wcd934x: fix resource leaks on component remove Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 26/78] ASoC: codecs: wcd938x: fix codec initialisation race Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 27/78] ASoC: codecs: wcd938x: fix soundwire " Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 28/78] ext4: correct inline offset when handling xattrs in inode body Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 29/78] [PATCH AUTOSEL 4.14 1/9] drm/radeon: Fix integer overflow in radeon_cs_parser_init Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 30/78] [PATCH AUTOSEL 4.14 2/9] ALSA: emu10k1: roll up loops in DSP setup code for Audigy Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 31/78] [PATCH AUTOSEL 4.14 3/9] quota: Properly disable quotas when add_dquot_ref() fails Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 32/78] [PATCH AUTOSEL 4.14 4/9] quota: fix warning in dqgrab() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 33/78] [PATCH AUTOSEL 4.14 5/9] udf: Fix uninitialized array access for some pathnames Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 34/78] [PATCH AUTOSEL 4.14 6/9] fs: jfs: Fix UBSAN: array-index-out-of-bounds in dbAllocDmapLev Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 35/78] [PATCH AUTOSEL 4.14 7/9] MIPS: dec: prom: Address -Warray-bounds warning Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 36/78] [PATCH AUTOSEL 4.14 8/9] FS: JFS: Fix null-ptr-deref Read in txBegin Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 37/78] [PATCH AUTOSEL 4.14 9/9] FS: JFS: Check for read-only mounted filesystem " Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 38/78] spi: bcm63xx: fix max prepend length Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 39/78] fbdev: imxfb: warn about invalid left/right margin Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 40/78] perf build: Fix library not found error when using CSLIBS Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 41/78] pinctrl: amd: Use amd_pinconf_set() for all config options Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 42/78] net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 43/78] bridge: Add extack warning when enabling STP in netns Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 44/78] ethernet: use eth_hw_addr_set() instead of ether_addr_copy() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 45/78] of: net: add a helper for loading netdev->dev_addr Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 46/78] ethernet: use of_get_ethdev_address() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 47/78] net: ethernet: mtk_eth_soc: handle probe deferral Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 48/78] net: sched: cls_bpf: Undo tcf_bind_filter in case of an error Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 49/78] iavf: Fix use-after-free in free_netdev Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 50/78] iavf: Fix out-of-bounds when setting channels on remove Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 51/78] security: keys: Modify mismatched function name Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 52/78] octeontx2-pf: Dont allocate BPIDs for LBK interfaces Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 53/78] bpf: Fix subprog idx logic in check_max_stack_depth Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 54/78] igc: Prevent garbled TX queue with XDP ZEROCOPY Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 55/78] tcp: annotate data-races around tcp_rsk(req)->ts_recent Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 56/78] net: ipv4: Use kfree_sensitive instead of kfree Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 57/78] net:ipv6: check return value of pskb_trim() Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 58/78] Revert "tcp: avoid the lookup process failing to get sk in ehash table" Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 59/78] fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 60/78] llc: Dont drop packet from non-root netns Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 61/78] netfilter: nf_tables: fix spurious set element insertion failure Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 62/78] netfilter: nft_set_pipapo: fix improper element removal Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 63/78] netfilter: nf_tables: skip bound chain in netns release path Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 64/78] netfilter: nf_tables: skip bound chain on rule flush Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 65/78] tcp: annotate data-races around tp->tcp_tx_delay Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 66/78] tcp: annotate data-races around tp->keepalive_time Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 67/78] tcp: annotate data-races around tp->keepalive_intvl Greg Kroah-Hartman
2023-07-25 10:46 ` [PATCH 5.15 68/78] tcp: annotate data-races around tp->keepalive_probes Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 69/78] tcp: annotate data-races around icsk->icsk_syn_retries Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 70/78] tcp: annotate data-races around tp->linger2 Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 71/78] tcp: annotate data-races around rskq_defer_accept Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 72/78] tcp: annotate data-races around tp->notsent_lowat Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 73/78] tcp: annotate data-races around icsk->icsk_user_timeout Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 74/78] tcp: annotate data-races around fastopenq.max_qlen Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 75/78] net: phy: prevent stale pointer dereference in phy_init() Greg Kroah-Hartman
2023-07-25 10:47 ` Greg Kroah-Hartman [this message]
2023-07-25 10:47 ` [PATCH 5.15 77/78] tracing/histograms: Return an error if we fail to add histogram to hist_vars list Greg Kroah-Hartman
2023-07-25 10:47 ` [PATCH 5.15 78/78] nixge: fix mac address error handling again Greg Kroah-Hartman
2023-07-25 16:27 ` [PATCH 5.15 00/78] 5.15.123-rc1 review Jon Hunter
2023-07-25 18:08 ` SeongJae Park
2023-07-25 20:20 ` Shuah Khan
2023-07-25 21:52 ` Florian Fainelli
2023-07-26 14:19 ` Naresh Kamboju
2023-07-27  0:02 ` Guenter Roeck
2023-07-27  0:10 ` Ron Economos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230725104454.221311267@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=chengzhihao1@huawei.com \
    --cc=jack@suse.cz \
    --cc=patches@lists.linux.dev \
    --cc=stable@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=yi.zhang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox