* [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count
2023-12-31 19:32 [PATCHSET v29.0 27/28] xfs: inode-related repair fixes Darrick J. Wong
@ 2023-12-31 20:41 ` Darrick J. Wong
0 siblings, 0 replies; 12+ messages in thread
From: Darrick J. Wong @ 2023-12-31 20:41 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
The VFS inc_nlink function does not explicitly check for integer
overflows in the i_nlink field. Instead, it checks the link count
against s_max_links in the vfs_{link,create,rename} functions. XFS
sets the maximum link count to 2.1 billion, so integer overflows should
not be a problem.
However. It's possible that online repair could find that a file has
more than four billion links, particularly if the link count got
corrupted while creating hardlinks to the file. The di_nlinkv2 field is
not large enough to store a value larger than 2^32, so we ought to
define a magic pin value of ~0U which means that the inode never gets
deleted. This will prevent a UAF error if the repair finds this
situation and users begin deleting links to the file.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_format.h | 6 ++++++
fs/xfs/scrub/dir_repair.c | 11 +++--------
fs/xfs/scrub/nlinks.c | 4 +++-
fs/xfs/scrub/nlinks_repair.c | 8 ++------
fs/xfs/xfs_inode.c | 33 ++++++++++++++++++++++-----------
5 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 7861539ab8b68..ec25010b57797 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -912,6 +912,12 @@ static inline uint xfs_dinode_size(int version)
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
+/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
/*
* Values for di_format
*
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 141682e2477af..48e30a9baeae0 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1145,7 +1145,9 @@ xrep_dir_set_nlink(
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *dp = sc->ip;
struct xfs_perag *pag;
- unsigned int new_nlink = rd->subdirs + 2;
+ unsigned int new_nlink = min_t(unsigned long long,
+ rd->subdirs + 2,
+ XFS_NLINK_PINNED);
int error;
/*
@@ -1201,13 +1203,6 @@ xrep_dir_swap(
bool ip_local, temp_local;
int error = 0;
- /*
- * If we found enough subdirs to overflow this directory's link count,
- * bail out to userspace before we modify anything.
- */
- if (rd->subdirs + 2 > XFS_MAXLINK)
- return -EFSCORRUPTED;
-
/*
* If we never found the parent for this directory, temporarily assign
* the root dir as the parent; we'll move this to the orphanage after
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 7be2119ce283a..6f0b77da14dbb 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -603,9 +603,11 @@ xchk_nlinks_compare_inode(
* this as a corruption. The VFS won't let users increase the link
* count, but it will let them decrease it.
*/
- if (total_links > XFS_MAXLINK) {
+ if (total_links > XFS_NLINK_PINNED) {
xchk_ino_set_corrupt(sc, ip->i_ino);
goto out_corrupt;
+ } else if (total_links > XFS_MAXLINK) {
+ xchk_ino_set_warning(sc, ip->i_ino);
}
/* Link counts should match. */
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 1345c07a95c62..87cb3400ff948 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -239,14 +239,10 @@ xrep_nlinks_repair_inode(
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
- if (total_links > XFS_MAXLINK) {
- trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
- goto out_trans;
- }
-
trace_xrep_nlinks_update_inode(mp, ip, &obs);
- set_nlink(VFS_I(ip), total_links);
+ set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
+ XFS_NLINK_PINNED));
dirty = true;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ea1b0bc9a3410..71640afc3a8ee 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -912,22 +912,25 @@ xfs_init_new_inode(
*/
static int /* error */
xfs_droplink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- if (VFS_I(ip)->i_nlink == 0) {
- xfs_alert(ip->i_mount,
- "%s: Attempt to drop inode (%llu) with nlink zero.",
- __func__, ip->i_ino);
- return -EFSCORRUPTED;
- }
+ struct inode *inode = VFS_I(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- drop_nlink(VFS_I(ip));
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (VFS_I(ip)->i_nlink)
+ if (inode->i_nlink)
return 0;
return xfs_iunlink(tp, ip);
@@ -941,9 +944,17 @@ xfs_bumplink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
+ struct inode *inode = VFS_I(ip);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- inc_nlink(VFS_I(ip));
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCHSET v29.4 13/13] xfs: inode-related repair fixes
@ 2024-02-27 2:18 Darrick J. Wong
2024-02-27 2:33 ` [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode Darrick J. Wong
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Darrick J. Wong @ 2024-02-27 2:18 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs, hch
Hi all,
While doing QA of the online fsck code, I made a few observations:
First, nobody was checking that the di_onlink field is actually zero;
Second, that allocating a temporary file for repairs can fail (and
thus bring down the entire fs) if the inode cluster is corrupt; and
Third, that file link counts do not pin at ~0U to prevent integer
overflows. Fourth, the x{chk,rep}_metadata_inode_fork functions
should be subclassing the main scrub context, not modifying the
parent's setup willy-nilly.
This scattered patchset fixes those three problems.
If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.
This has been running on the djcloud for months with no problems. Enjoy!
Comments and questions are, as always, welcome.
--D
kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=inode-repair-improvements
xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=inode-repair-improvements
---
Commits in this patchset:
* xfs: check unused nlink fields in the ondisk inode
* xfs: try to avoid allocating from sick inode clusters
* xfs: pin inodes that would otherwise overflow link count
* xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype
---
fs/xfs/libxfs/xfs_format.h | 6 ++++
fs/xfs/libxfs/xfs_ialloc.c | 40 ++++++++++++++++++++++++
fs/xfs/libxfs/xfs_inode_buf.c | 8 +++++
fs/xfs/scrub/common.c | 23 ++------------
fs/xfs/scrub/dir_repair.c | 11 ++-----
fs/xfs/scrub/inode_repair.c | 12 +++++++
fs/xfs/scrub/nlinks.c | 4 ++
fs/xfs/scrub/nlinks_repair.c | 8 +----
fs/xfs/scrub/repair.c | 67 ++++++++---------------------------------
fs/xfs/scrub/scrub.c | 63 +++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/scrub.h | 11 +++++++
fs/xfs/xfs_inode.c | 33 +++++++++++++-------
12 files changed, 187 insertions(+), 99 deletions(-)
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode
2024-02-27 2:18 [PATCHSET v29.4 13/13] xfs: inode-related repair fixes Darrick J. Wong
@ 2024-02-27 2:33 ` Darrick J. Wong
2024-02-28 17:42 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters Darrick J. Wong
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Darrick J. Wong @ 2024-02-27 2:33 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs, hch
From: Darrick J. Wong <djwong@kernel.org>
v2/v3 inodes use di_nlink and not di_onlink; and v1 inodes use di_onlink
and not di_nlink. Whichever field is not in use, make sure its contents
are zero, and teach xfs_scrub to fix that if it is.
This clears a bunch of missing scrub failure errors in xfs/385 for
core.onlink.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++
fs/xfs/scrub/inode_repair.c | 12 ++++++++++++
2 files changed, 20 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d0dcce462bf42..d79002343d0b6 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -491,6 +491,14 @@ xfs_dinode_verify(
return __this_address;
}
+ if (dip->di_version > 1) {
+ if (dip->di_onlink)
+ return __this_address;
+ } else {
+ if (dip->di_nlink)
+ return __this_address;
+ }
+
/* don't allow invalid i_size */
di_size = be64_to_cpu(dip->di_size);
if (di_size & (1ULL << 63))
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 00d08eef32525..90893b423cf13 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -468,6 +468,17 @@ xrep_dinode_mode(
return 0;
}
+/* Fix unused link count fields having nonzero values. */
+STATIC void
+xrep_dinode_nlinks(
+ struct xfs_dinode *dip)
+{
+ if (dip->di_version > 1)
+ dip->di_onlink = 0;
+ else
+ dip->di_nlink = 0;
+}
+
/* Fix any conflicting flags that the verifiers complain about. */
STATIC void
xrep_dinode_flags(
@@ -1329,6 +1340,7 @@ xrep_dinode_core(
iget_error = xrep_dinode_mode(ri, dip);
if (iget_error)
goto write;
+ xrep_dinode_nlinks(dip);
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
xrep_dinode_size(ri, dip);
xrep_dinode_extsize_hints(sc, dip);
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters
2024-02-27 2:18 [PATCHSET v29.4 13/13] xfs: inode-related repair fixes Darrick J. Wong
2024-02-27 2:33 ` [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode Darrick J. Wong
@ 2024-02-27 2:34 ` Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
2024-02-27 2:34 ` [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype Darrick J. Wong
3 siblings, 1 reply; 12+ messages in thread
From: Darrick J. Wong @ 2024-02-27 2:34 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs, hch
From: Darrick J. Wong <djwong@kernel.org>
I noticed that xfs/413 and xfs/375 occasionally failed while fuzzing
core.mode of an inode. The root cause of these problems is that the
field we fuzzed (core.mode or core.magic, typically) causes the entire
inode cluster buffer verification to fail, which affects several inodes
at once. The repair process tries to create either a /lost+found or a
temporary repair file, but regrettably it picks the same inode cluster
that we just corrupted, with the result that repair triggers the demise
of the filesystem.
Try avoid this by making the inode allocation path detect when the perag
health status indicates that someone has found bad inode cluster
buffers, and try to read the inode cluster buffer. If the cluster
buffer fails the verifiers, try another AG. This isn't foolproof and
can result in premature ENOSPC, but that might be better than shutting
down.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_ialloc.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index e5ac3e5430c4e..8279d90da7e7b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1057,6 +1057,33 @@ xfs_inobt_first_free_inode(
return xfs_lowbit64(realfree);
}
+/*
+ * If this AG has corrupt inodes, check if allocating this inode would fail
+ * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
+ * somewhere else.
+ */
+static int
+xfs_dialloc_check_ino(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ xfs_ino_t ino)
+{
+ struct xfs_imap imap;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xfs_imap(pag, tp, ino, &imap, 0);
+ if (error)
+ return -EAGAIN;
+
+ error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
+ if (error)
+ return -EAGAIN;
+
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
/*
* Allocate an inode using the inobt-only algorithm.
*/
@@ -1309,6 +1336,13 @@ xfs_dialloc_ag_inobt(
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error0;
+ }
+
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
error = xfs_inobt_update(cur, &rec);
@@ -1584,6 +1618,12 @@ xfs_dialloc_ag(
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error_cur;
+ }
+
/*
* Modify or remove the finobt record.
*/
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count
2024-02-27 2:18 [PATCHSET v29.4 13/13] xfs: inode-related repair fixes Darrick J. Wong
2024-02-27 2:33 ` [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode Darrick J. Wong
2024-02-27 2:34 ` [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters Darrick J. Wong
@ 2024-02-27 2:34 ` Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype Darrick J. Wong
3 siblings, 1 reply; 12+ messages in thread
From: Darrick J. Wong @ 2024-02-27 2:34 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs, hch
From: Darrick J. Wong <djwong@kernel.org>
The VFS inc_nlink function does not explicitly check for integer
overflows in the i_nlink field. Instead, it checks the link count
against s_max_links in the vfs_{link,create,rename} functions. XFS
sets the maximum link count to 2.1 billion, so integer overflows should
not be a problem.
However. It's possible that online repair could find that a file has
more than four billion links, particularly if the link count got
corrupted while creating hardlinks to the file. The di_nlinkv2 field is
not large enough to store a value larger than 2^32, so we ought to
define a magic pin value of ~0U which means that the inode never gets
deleted. This will prevent a UAF error if the repair finds this
situation and users begin deleting links to the file.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_format.h | 6 ++++++
fs/xfs/scrub/dir_repair.c | 11 +++--------
fs/xfs/scrub/nlinks.c | 4 +++-
fs/xfs/scrub/nlinks_repair.c | 8 ++------
fs/xfs/xfs_inode.c | 33 ++++++++++++++++++++++-----------
5 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index aa2ad7e04202b..de90dae8b1a63 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -911,6 +911,12 @@ static inline uint xfs_dinode_size(int version)
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
+/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
/*
* Values for di_format
*
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 0bf73e9c63fba..edc02e1a210b3 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1147,7 +1147,9 @@ xrep_dir_set_nlink(
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *dp = sc->ip;
struct xfs_perag *pag;
- unsigned int new_nlink = rd->subdirs + 2;
+ unsigned int new_nlink = min_t(unsigned long long,
+ rd->subdirs + 2,
+ XFS_NLINK_PINNED);
int error;
/*
@@ -1203,13 +1205,6 @@ xrep_dir_swap(
bool ip_local, temp_local;
int error = 0;
- /*
- * If we found enough subdirs to overflow this directory's link count,
- * bail out to userspace before we modify anything.
- */
- if (rd->subdirs + 2 > XFS_MAXLINK)
- return -EFSCORRUPTED;
-
/*
* If we never found the parent for this directory, temporarily assign
* the root dir as the parent; we'll move this to the orphanage after
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index b13bbbdf2ad32..15ea06db31545 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -603,9 +603,11 @@ xchk_nlinks_compare_inode(
* this as a corruption. The VFS won't let users increase the link
* count, but it will let them decrease it.
*/
- if (total_links > XFS_MAXLINK) {
+ if (total_links > XFS_NLINK_PINNED) {
xchk_ino_set_corrupt(sc, ip->i_ino);
goto out_corrupt;
+ } else if (total_links > XFS_MAXLINK) {
+ xchk_ino_set_warning(sc, ip->i_ino);
}
/* Link counts should match. */
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 1345c07a95c62..87cb3400ff948 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -239,14 +239,10 @@ xrep_nlinks_repair_inode(
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
- if (total_links > XFS_MAXLINK) {
- trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
- goto out_trans;
- }
-
trace_xrep_nlinks_update_inode(mp, ip, &obs);
- set_nlink(VFS_I(ip), total_links);
+ set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
+ XFS_NLINK_PINNED));
dirty = true;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b2c287dca611b..5326166352fed 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -912,22 +912,25 @@ xfs_init_new_inode(
*/
static int /* error */
xfs_droplink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- if (VFS_I(ip)->i_nlink == 0) {
- xfs_alert(ip->i_mount,
- "%s: Attempt to drop inode (%llu) with nlink zero.",
- __func__, ip->i_ino);
- return -EFSCORRUPTED;
- }
+ struct inode *inode = VFS_I(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- drop_nlink(VFS_I(ip));
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (VFS_I(ip)->i_nlink)
+ if (inode->i_nlink)
return 0;
return xfs_iunlink(tp, ip);
@@ -941,9 +944,17 @@ xfs_bumplink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
+ struct inode *inode = VFS_I(ip);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- inc_nlink(VFS_I(ip));
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype
2024-02-27 2:18 [PATCHSET v29.4 13/13] xfs: inode-related repair fixes Darrick J. Wong
` (2 preceding siblings ...)
2024-02-27 2:34 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
@ 2024-02-27 2:34 ` Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
3 siblings, 1 reply; 12+ messages in thread
From: Darrick J. Wong @ 2024-02-27 2:34 UTC (permalink / raw)
To: djwong; +Cc: linux-xfs, hch
From: Darrick J. Wong <djwong@kernel.org>
When a file-based metadata structure is being scrubbed in
xchk_metadata_inode_subtype, we should create an entirely new scrub
context so that each scrubber doesn't trip over another's buffers.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/scrub/common.c | 23 +++--------------
fs/xfs/scrub/repair.c | 67 ++++++++++---------------------------------------
fs/xfs/scrub/scrub.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/scrub.h | 11 ++++++++
4 files changed, 91 insertions(+), 73 deletions(-)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 4afaa0a0760c6..599de8690f335 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1205,27 +1205,12 @@ xchk_metadata_inode_subtype(
struct xfs_scrub *sc,
unsigned int scrub_type)
{
- __u32 smtype = sc->sm->sm_type;
- unsigned int sick_mask = sc->sick_mask;
+ struct xfs_scrub_subord *sub;
int error;
- sc->sm->sm_type = scrub_type;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- break;
- }
-
- sc->sick_mask = sick_mask;
- sc->sm->sm_type = smtype;
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
+ xchk_scrub_free_subord(sub);
return error;
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 9cf0014ecd3e0..445ff3f76b339 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1009,55 +1009,27 @@ xrep_metadata_inode_subtype(
struct xfs_scrub *sc,
unsigned int scrub_type)
{
- __u32 smtype = sc->sm->sm_type;
- __u32 smflags = sc->sm->sm_flags;
- unsigned int sick_mask = sc->sick_mask;
+ struct xfs_scrub_subord *sub;
int error;
/*
- * Let's see if the inode needs repair. We're going to open-code calls
- * to the scrub and repair functions so that we can hang on to the
+ * Let's see if the inode needs repair. Use a subordinate scrub context
+ * to call the scrub and repair functions so that we can hang on to the
* resources that we already acquired instead of using the standard
* setup/teardown routines.
*/
- sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
- sc->sm->sm_type = scrub_type;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xchk_bmap_attr(sc);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- }
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
if (error)
goto out;
-
- if (!xrep_will_attempt(sc))
+ if (!xrep_will_attempt(&sub->sc))
goto out;
/*
* Repair some part of the inode. This will potentially join the inode
* to the transaction.
*/
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xrep_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xrep_bmap(sc, XFS_DATA_FORK, false);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xrep_bmap(sc, XFS_ATTR_FORK, false);
- break;
- }
+ error = sub->sc.ops->repair(&sub->sc);
if (error)
goto out;
@@ -1066,10 +1038,10 @@ xrep_metadata_inode_subtype(
* that the inode will not be joined to the transaction when we exit
* the function.
*/
- error = xfs_defer_finish(&sc->tp);
+ error = xfs_defer_finish(&sub->sc.tp);
if (error)
goto out;
- error = xfs_trans_roll(&sc->tp);
+ error = xfs_trans_roll(&sub->sc.tp);
if (error)
goto out;
@@ -1077,31 +1049,18 @@ xrep_metadata_inode_subtype(
* Clear the corruption flags and re-check the metadata that we just
* repaired.
*/
- sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
-
- switch (scrub_type) {
- case XFS_SCRUB_TYPE_INODE:
- error = xchk_inode(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTD:
- error = xchk_bmap_data(sc);
- break;
- case XFS_SCRUB_TYPE_BMBTA:
- error = xchk_bmap_attr(sc);
- break;
- }
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = sub->sc.ops->scrub(&sub->sc);
if (error)
goto out;
/* If corruption persists, the repair has failed. */
- if (xchk_needs_repair(sc->sm)) {
+ if (xchk_needs_repair(sub->sc.sm)) {
error = -EFSCORRUPTED;
goto out;
}
out:
- sc->sick_mask = sick_mask;
- sc->sm->sm_type = smtype;
- sc->sm->sm_flags = smflags;
+ xchk_scrub_free_subord(sub);
return error;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 301d5b753fdd5..ebb06838c31be 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -177,6 +177,39 @@ xchk_fsgates_disable(
}
#undef FSGATES_MASK
+/* Free the resources associated with a scrub subtype. */
+void
+xchk_scrub_free_subord(
+ struct xfs_scrub_subord *sub)
+{
+ struct xfs_scrub *sc = sub->parent_sc;
+
+ ASSERT(sc->ip == sub->sc.ip);
+ ASSERT(sc->orphanage == sub->sc.orphanage);
+ ASSERT(sc->tempip == sub->sc.tempip);
+
+ sc->sm->sm_type = sub->old_smtype;
+ sc->sm->sm_flags = sub->old_smflags |
+ (sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT);
+ sc->tp = sub->sc.tp;
+
+ if (sub->sc.buf) {
+ if (sub->sc.buf_cleanup)
+ sub->sc.buf_cleanup(sub->sc.buf);
+ kvfree(sub->sc.buf);
+ }
+ if (sub->sc.xmbtp)
+ xmbuf_free(sub->sc.xmbtp);
+ if (sub->sc.xfile)
+ xfile_destroy(sub->sc.xfile);
+
+ sc->ilock_flags = sub->sc.ilock_flags;
+ sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags;
+ sc->temp_ilock_flags = sub->sc.temp_ilock_flags;
+
+ kfree(sub);
+}
+
/* Free all the resources and finish the transactions. */
STATIC int
xchk_teardown(
@@ -505,6 +538,36 @@ static inline void xchk_postmortem(struct xfs_scrub *sc)
}
#endif /* CONFIG_XFS_ONLINE_REPAIR */
+/*
+ * Create a new scrub context from an existing one, but with a different scrub
+ * type.
+ */
+struct xfs_scrub_subord *
+xchk_scrub_create_subord(
+ struct xfs_scrub *sc,
+ unsigned int subtype)
+{
+ struct xfs_scrub_subord *sub;
+
+ sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS);
+ if (!sub)
+ return ERR_PTR(-ENOMEM);
+
+ sub->old_smtype = sc->sm->sm_type;
+ sub->old_smflags = sc->sm->sm_flags;
+ sub->parent_sc = sc;
+ memcpy(&sub->sc, sc, sizeof(struct xfs_scrub));
+ sub->sc.ops = &meta_scrub_ops[subtype];
+ sub->sc.sm->sm_type = subtype;
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ sub->sc.buf = NULL;
+ sub->sc.buf_cleanup = NULL;
+ sub->sc.xfile = NULL;
+ sub->sc.xmbtp = NULL;
+
+ return sub;
+}
+
/* Dispatch metadata scrubbing. */
int
xfs_scrub_metadata(
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 456bb181399f4..2661874b01ab2 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -156,6 +156,17 @@ struct xfs_scrub {
*/
#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHMAPS)
+struct xfs_scrub_subord {
+ struct xfs_scrub sc;
+ struct xfs_scrub *parent_sc;
+ unsigned int old_smtype;
+ unsigned int old_smflags;
+};
+
+struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
+ unsigned int subtype);
+void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
+
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode
2024-02-27 2:33 ` [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode Darrick J. Wong
@ 2024-02-28 17:42 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2024-02-28 17:42 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, hch
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters
2024-02-27 2:34 ` [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters Darrick J. Wong
@ 2024-02-28 17:43 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2024-02-28 17:43 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, hch
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count
2024-02-27 2:34 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
@ 2024-02-28 17:43 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2024-02-28 17:43 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, hch
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype
2024-02-27 2:34 ` [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype Darrick J. Wong
@ 2024-02-28 17:43 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2024-02-28 17:43 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: linux-xfs, hch
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count
2024-03-27 1:49 [PATCHSET v30.1 14/15] xfs: inode-related repair fixes Darrick J. Wong
@ 2024-03-27 2:07 ` Darrick J. Wong
0 siblings, 0 replies; 12+ messages in thread
From: Darrick J. Wong @ 2024-03-27 2:07 UTC (permalink / raw)
To: djwong; +Cc: Christoph Hellwig, hch, linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
The VFS inc_nlink function does not explicitly check for integer
overflows in the i_nlink field. Instead, it checks the link count
against s_max_links in the vfs_{link,create,rename} functions. XFS
sets the maximum link count to 2.1 billion, so integer overflows should
not be a problem.
However. It's possible that online repair could find that a file has
more than four billion links, particularly if the link count got
corrupted while creating hardlinks to the file. The di_nlinkv2 field is
not large enough to store a value larger than 2^32, so we ought to
define a magic pin value of ~0U which means that the inode never gets
deleted. This will prevent a UAF error if the repair finds this
situation and users begin deleting links to the file.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_format.h | 6 ++++++
fs/xfs/scrub/dir_repair.c | 11 +++--------
fs/xfs/scrub/nlinks.c | 4 +++-
fs/xfs/scrub/nlinks_repair.c | 8 ++------
fs/xfs/xfs_inode.c | 33 ++++++++++++++++++++++-----------
5 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index aa2ad7e04202b..de90dae8b1a63 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -911,6 +911,12 @@ static inline uint xfs_dinode_size(int version)
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
+/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
/*
* Values for di_format
*
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 983d6519dc3f6..01893f25454b2 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1147,7 +1147,9 @@ xrep_dir_set_nlink(
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *dp = sc->ip;
struct xfs_perag *pag;
- unsigned int new_nlink = rd->subdirs + 2;
+ unsigned int new_nlink = min_t(unsigned long long,
+ rd->subdirs + 2,
+ XFS_NLINK_PINNED);
int error;
/*
@@ -1203,13 +1205,6 @@ xrep_dir_swap(
bool ip_local, temp_local;
int error = 0;
- /*
- * If we found enough subdirs to overflow this directory's link count,
- * bail out to userspace before we modify anything.
- */
- if (rd->subdirs + 2 > XFS_MAXLINK)
- return -EFSCORRUPTED;
-
/*
* If we never found the parent for this directory, temporarily assign
* the root dir as the parent; we'll move this to the orphanage after
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index b13bbbdf2ad32..15ea06db31545 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -603,9 +603,11 @@ xchk_nlinks_compare_inode(
* this as a corruption. The VFS won't let users increase the link
* count, but it will let them decrease it.
*/
- if (total_links > XFS_MAXLINK) {
+ if (total_links > XFS_NLINK_PINNED) {
xchk_ino_set_corrupt(sc, ip->i_ino);
goto out_corrupt;
+ } else if (total_links > XFS_MAXLINK) {
+ xchk_ino_set_warning(sc, ip->i_ino);
}
/* Link counts should match. */
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 1345c07a95c62..87cb3400ff948 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -239,14 +239,10 @@ xrep_nlinks_repair_inode(
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
- if (total_links > XFS_MAXLINK) {
- trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
- goto out_trans;
- }
-
trace_xrep_nlinks_update_inode(mp, ip, &obs);
- set_nlink(VFS_I(ip), total_links);
+ set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
+ XFS_NLINK_PINNED));
dirty = true;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 666bd03cf05c3..90865af01cd35 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -890,22 +890,25 @@ xfs_init_new_inode(
*/
static int /* error */
xfs_droplink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- if (VFS_I(ip)->i_nlink == 0) {
- xfs_alert(ip->i_mount,
- "%s: Attempt to drop inode (%llu) with nlink zero.",
- __func__, ip->i_ino);
- return -EFSCORRUPTED;
- }
+ struct inode *inode = VFS_I(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- drop_nlink(VFS_I(ip));
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (VFS_I(ip)->i_nlink)
+ if (inode->i_nlink)
return 0;
return xfs_iunlink(tp, ip);
@@ -919,9 +922,17 @@ xfs_bumplink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
+ struct inode *inode = VFS_I(ip);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- inc_nlink(VFS_I(ip));
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count
2024-04-15 23:36 [PATCHSET v30.3 13/16] xfs: inode-related repair fixes Darrick J. Wong
@ 2024-04-15 23:55 ` Darrick J. Wong
0 siblings, 0 replies; 12+ messages in thread
From: Darrick J. Wong @ 2024-04-15 23:55 UTC (permalink / raw)
To: chandanbabu, djwong; +Cc: Christoph Hellwig, hch, linux-xfs
From: Darrick J. Wong <djwong@kernel.org>
The VFS inc_nlink function does not explicitly check for integer
overflows in the i_nlink field. Instead, it checks the link count
against s_max_links in the vfs_{link,create,rename} functions. XFS
sets the maximum link count to 2.1 billion, so integer overflows should
not be a problem.
However. It's possible that online repair could find that a file has
more than four billion links, particularly if the link count got
corrupted while creating hardlinks to the file. The di_nlinkv2 field is
not large enough to store a value larger than 2^32, so we ought to
define a magic pin value of ~0U which means that the inode never gets
deleted. This will prevent a UAF error if the repair finds this
situation and users begin deleting links to the file.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_format.h | 6 ++++++
fs/xfs/scrub/dir_repair.c | 11 +++--------
fs/xfs/scrub/nlinks.c | 4 +++-
fs/xfs/scrub/nlinks_repair.c | 8 ++------
fs/xfs/xfs_inode.c | 33 ++++++++++++++++++++++-----------
5 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 10153ce116d4..f1818c54af6f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -899,6 +899,12 @@ static inline uint xfs_dinode_size(int version)
*/
#define XFS_MAXLINK ((1U << 31) - 1U)
+/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
/*
* Values for di_format
*
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index c150b2efa2c2..38957da26b94 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1145,7 +1145,9 @@ xrep_dir_set_nlink(
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *dp = sc->ip;
struct xfs_perag *pag;
- unsigned int new_nlink = rd->subdirs + 2;
+ unsigned int new_nlink = min_t(unsigned long long,
+ rd->subdirs + 2,
+ XFS_NLINK_PINNED);
int error;
/*
@@ -1201,13 +1203,6 @@ xrep_dir_swap(
bool ip_local, temp_local;
int error = 0;
- /*
- * If we found enough subdirs to overflow this directory's link count,
- * bail out to userspace before we modify anything.
- */
- if (rd->subdirs + 2 > XFS_MAXLINK)
- return -EFSCORRUPTED;
-
/*
* If we never found the parent for this directory, temporarily assign
* the root dir as the parent; we'll move this to the orphanage after
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index c456523fac9c..fcb9c473f372 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -607,9 +607,11 @@ xchk_nlinks_compare_inode(
* this as a corruption. The VFS won't let users increase the link
* count, but it will let them decrease it.
*/
- if (total_links > XFS_MAXLINK) {
+ if (total_links > XFS_NLINK_PINNED) {
xchk_ino_set_corrupt(sc, ip->i_ino);
goto out_corrupt;
+ } else if (total_links > XFS_MAXLINK) {
+ xchk_ino_set_warning(sc, ip->i_ino);
}
/* Link counts should match. */
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 0cb67339eac8..83f8637bb08f 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -238,14 +238,10 @@ xrep_nlinks_repair_inode(
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
- if (total_links > XFS_MAXLINK) {
- trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
- goto out_trans;
- }
-
trace_xrep_nlinks_update_inode(mp, ip, &obs);
- set_nlink(VFS_I(ip), total_links);
+ set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
+ XFS_NLINK_PINNED));
dirty = true;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index fed0cd6bffdf..03dcb4ac0431 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -890,22 +890,25 @@ xfs_init_new_inode(
*/
static int /* error */
xfs_droplink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- if (VFS_I(ip)->i_nlink == 0) {
- xfs_alert(ip->i_mount,
- "%s: Attempt to drop inode (%llu) with nlink zero.",
- __func__, ip->i_ino);
- return -EFSCORRUPTED;
- }
+ struct inode *inode = VFS_I(ip);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- drop_nlink(VFS_I(ip));
+ if (inode->i_nlink == 0) {
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count dropped below zero. Pinning link count.",
+ ip->i_ino);
+ set_nlink(inode, XFS_NLINK_PINNED);
+ }
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ drop_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (VFS_I(ip)->i_nlink)
+ if (inode->i_nlink)
return 0;
return xfs_iunlink(tp, ip);
@@ -919,9 +922,17 @@ xfs_bumplink(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
+ struct inode *inode = VFS_I(ip);
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- inc_nlink(VFS_I(ip));
+ if (inode->i_nlink == XFS_NLINK_PINNED - 1)
+ xfs_info_ratelimited(tp->t_mountp,
+ "Inode 0x%llx link count exceeded maximum. Pinning link count.",
+ ip->i_ino);
+ if (inode->i_nlink != XFS_NLINK_PINNED)
+ inc_nlink(inode);
+
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2024-04-15 23:55 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-27 2:18 [PATCHSET v29.4 13/13] xfs: inode-related repair fixes Darrick J. Wong
2024-02-27 2:33 ` [PATCH 1/4] xfs: check unused nlink fields in the ondisk inode Darrick J. Wong
2024-02-28 17:42 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 2/4] xfs: try to avoid allocating from sick inode clusters Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
2024-02-27 2:34 ` [PATCH 4/4] xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype Darrick J. Wong
2024-02-28 17:43 ` Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2024-04-15 23:36 [PATCHSET v30.3 13/16] xfs: inode-related repair fixes Darrick J. Wong
2024-04-15 23:55 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
2024-03-27 1:49 [PATCHSET v30.1 14/15] xfs: inode-related repair fixes Darrick J. Wong
2024-03-27 2:07 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
2023-12-31 19:32 [PATCHSET v29.0 27/28] xfs: inode-related repair fixes Darrick J. Wong
2023-12-31 20:41 ` [PATCH 3/4] xfs: pin inodes that would otherwise overflow link count Darrick J. Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox