public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Wengang Wang <wen.gang.wang@oracle.com>
To: linux-xfs@vger.kernel.org
Cc: wen.gang.wang@oracle.com
Subject: [PATCH 2/2] xfs: log recovery stage split EFIs with multiple extents
Date: Fri, 14 Apr 2023 15:58:36 -0700	[thread overview]
Message-ID: <20230414225836.8952-3-wen.gang.wang@oracle.com> (raw)
In-Reply-To: <20230414225836.8952-1-wen.gang.wang@oracle.com>

At log recovery stage, we need to split EFIs with multiple extents. For each
orginal multiple-extent EFI, split it into new EFIs each including one extent
from the original EFI. By that we avoid deadlock when allocating blocks for
AGFL waiting for the held busy extents by current transaction to be flushed.

 For the original EFI, the process is
 1. Create and log new EFIs each covering one extent from the
    original EFI.
 2. Don't free extent with the original EFI.
 3. Log EFD for the original EFI.
    Make sure we log the new EFIs and original EFD in this order:
      new EFI 1
      new EFI 2
      ...
      new EFI N
      original EFD
 The original extents are freed with the new EFIs.

The example log items:

 rbbn 41572 rec_lsn: 1638833,41568 Oper 18: tid: d746ea5d  len: 48 flags: None
 EFI  nextents:2 id:ffff8b10b5a13c28        --> orginal EFI
 EFI id=ffff8b10b5a13c28 (0x5de4c42, 256)
 EFI id=ffff8b10b5a13c28 (0x5de4942, 256)

 rbbn 39041 rec_lsn: 1638834,39040 Oper 2: tid: 4e651c99  len: 32 flags: None
 EFI  nextents:1 id:ffff9fef39f4c528	    --> new EFI 1
 EFI id=ffff9fef39f4c528 (0x5de4c42, 256)
 -----------------------------------------------------------------------------
 rbbn 39041 rec_lsn: 1638834,39040 Oper 3: tid: 4e651c99  len: 32 flags: None
 EFI  nextents:1 id:ffff9fef39f4f548	    --> new EFI 2
 EFI id=ffff9fef39f4f548 (0x5de4942, 256)
 -----------------------------------------------------------------------------
 rbbn 39041 rec_lsn: 1638834,39040 Oper 4: tid: 4e651c99  len: 48 flags: None
 EFD  nextents:2 id:ffff8b10b5a13c28	    --> EFD to original EFI
 EFD id=ffff8b10b5a13c28 (0x5de4c42, 256)
 EFD id=ffff8b10b5a13c28 (0x5de4942, 256)
 -----------------------------------------------------------------------------
 rbbn 39041 rec_lsn: 1638834,39040 Oper 5: tid: 4e651c99  len: 32 flags: None
 EFD  nextents:1 id:ffff9fef39f4c528	    --> EFD to new EFI 1
 EFD id=ffff9fef39f4c528 (0x5de4c42, 256)

 ......

 rbbn 39057 rec_lsn: 1638834,39056 Oper 2: tid: e3264681  len: 32 flags: None
 EFD  nextents:1 id:ffff9fef39f4f548	    --> EFD to new EFI 2
 EFD id=ffff9fef39f4f548 (0x5de4942, 256)

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/xfs/xfs_extfree_item.c | 104 ++++++++++++++++++++++++++++++++++----
 1 file changed, 93 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 011b50469301..b00b44234397 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -595,7 +595,11 @@ xfs_efi_item_recover(
 	struct list_head		*capture_list)
 {
 	struct xfs_efi_log_item		*efip = EFI_ITEM(lip);
+	int				nr_ext = efip->efi_format.efi_nextents;
 	struct xfs_mount		*mp = lip->li_log->l_mp;
+	struct xfs_efi_log_item		**new_efis, *new_efip;
+	struct xfs_efd_log_item		*new_efdp;
+	struct xfs_extent_free_item	fake;
 	struct xfs_efd_log_item		*efdp;
 	struct xfs_trans		*tp;
 	int				i;
@@ -606,7 +610,7 @@ xfs_efi_item_recover(
 	 * EFI.  If any are bad, then assume that all are bad and
 	 * just toss the EFI.
 	 */
-	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+	for (i = 0; i < nr_ext; i++) {
 		if (!xfs_efi_validate_ext(mp,
 					&efip->efi_format.efi_extents[i])) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
@@ -619,28 +623,106 @@ xfs_efi_item_recover(
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error)
 		return error;
-	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
-	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
-		struct xfs_extent_free_item	fake = {
-			.xefi_owner		= XFS_RMAP_OWN_UNKNOWN,
-		};
+	memset(&fake, 0, sizeof(fake));
+	fake.xefi_owner = XFS_RMAP_OWN_UNKNOWN;
+
+	if (nr_ext <= 1) {
+		efdp = xfs_trans_get_efd(tp, efip,
+				efip->efi_format.efi_nextents);
+
+		for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+			struct xfs_extent		*extp;
+
+			extp = &efip->efi_format.efi_extents[i];
+
+			fake.xefi_startblock = extp->ext_start;
+			fake.xefi_blockcount = extp->ext_len;
+
+			error = xfs_trans_free_extent(tp, efdp, &fake);
+			if (error == -EFSCORRUPTED)
+				XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+						extp, sizeof(*extp));
+			if (error)
+				goto abort_error;
+
+		}
+
+		return xfs_defer_ops_capture_and_commit(tp, capture_list);
+	}
+
+	/*
+	 * Log recovery stage, we need to split a EFI into new EFIs if the
+	 * original EFI includes more than one extents. Check the change of
+	 * XFS_EFI_MAX_FAST_EXTENTS for the reason.
+	 * For the original EFI, the process is
+	 * 1. Create and log new EFIs each covering one extent from the
+	 *    original EFI.
+	 * 2. Don't free extent with the original EFI.
+	 * 3. Log EFD for the original EFI.
+	 *    Make sure we log the new EFIs and original EFD in this order:
+	 *	new EFI 1
+	 *	new EFI 2
+	 *	...
+	 *	new EFI N
+	 *	original EFD
+	 * The original extents are freed with the new EFIs.
+	 */
+	new_efis = kmem_zalloc(sizeof(*new_efis) * nr_ext, 0);
+	if (!new_efis) {
+		error = -ENOMEM;
+		goto abort_error;
+	}
+	for (i = 0; i < nr_ext; i++) {
 		struct xfs_extent		*extp;
 
+		new_efip = xfs_efi_init(mp, 1);
 		extp = &efip->efi_format.efi_extents[i];
 
 		fake.xefi_startblock = extp->ext_start;
 		fake.xefi_blockcount = extp->ext_len;
+		xfs_trans_add_item(tp, &new_efip->efi_item);
+		xfs_extent_free_log_item(tp, new_efip, &fake);
+		new_efis[i] = new_efip;
+	}
+
+	/*
+	 * The new EFIs are in transaction now, add original EFD with
+	 * full extents.
+	 */
+	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+	set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+	efdp->efd_next_extent = nr_ext;
+	for (i = 0; i < nr_ext; i++)
+		efdp->efd_format.efd_extents[i] =
+			efip->efi_format.efi_extents[i];
 
-		error = xfs_trans_free_extent(tp, efdp, &fake);
+	/*
+	 * Now process the new EFIs.
+	 * Current transaction is a new one, there are no defered
+	 * works attached. It's safe to use the following first
+	 * xfs_trans_roll() to commit it.
+	 */
+	for (i = 0; i < nr_ext; i++) {
+		struct xfs_extent		*extp;
+
+		new_efip = new_efis[i];
+		new_efdp = xfs_trans_get_efd(tp, new_efip, 1);
+		extp = &new_efip->efi_format.efi_extents[0];
+		fake.xefi_startblock = extp->ext_start;
+		fake.xefi_blockcount = extp->ext_len;
+		error = xfs_trans_free_extent(tp, new_efdp, &fake);
 		if (error == -EFSCORRUPTED)
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
-					extp, sizeof(*extp));
-		if (error)
+						extp, sizeof(*extp));
+		if (!error)
+			error = xfs_trans_roll(&tp);
+		if (error) {
+			kmem_free(new_efis);
 			goto abort_error;
-
+		}
 	}
-
+	kmem_free(new_efis);
 	return xfs_defer_ops_capture_and_commit(tp, capture_list);
 
 abort_error:
-- 
2.21.0 (Apple Git-122.2)


  parent reply	other threads:[~2023-04-14 22:58 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-14 22:58 [PATCH 0/2] xfs: one extent per EFI Wengang Wang
2023-04-14 22:58 ` [PATCH 1/2] xfs: IO time " Wengang Wang
2023-04-19 23:55   ` Dave Chinner
2023-04-20 17:31     ` Wengang Wang
2023-04-20 23:22       ` Dave Chinner
2023-04-21  0:24         ` Wengang Wang
2023-04-21  9:34           ` Dave Chinner
2023-04-21 18:23             ` Wengang Wang
2023-04-22  3:22               ` Wengang Wang
2023-04-24 15:53                 ` Wengang Wang
2023-04-24 22:52                   ` Wengang Wang
2023-04-14 22:58 ` Wengang Wang [this message]
2023-04-20  0:30   ` [PATCH 2/2] xfs: log recovery stage split EFIs with multiple extents Dave Chinner
2023-04-20 17:10     ` Wengang Wang
2023-04-20 22:54       ` Dave Chinner
2023-04-21  0:32         ` Wengang Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230414225836.8952-3-wen.gang.wang@oracle.com \
    --to=wen.gang.wang@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox