linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jian Wen <wenjianhn@gmail.com>
To: linux-xfs@vger.kernel.org
Cc: Jian Wen <wenjianhn@gmail.com>,
	djwong@kernel.org, hch@lst.de, dchinner@redhat.com,
	Dave Chinner <david@fromorbit.com>,
	Jian Wen <wenjian1@xiaomi.com>
Subject: [PATCH v3] xfs: improve handling of prjquot ENOSPC
Date: Sat, 23 Dec 2023 18:56:32 +0800	[thread overview]
Message-ID: <20231223105632.85286-1-wenjianhn@gmail.com> (raw)
In-Reply-To: <20231214150708.77586-1-wenjianhn@gmail.com>

Currently, xfs_trans_dqresv() return -ENOSPC when the project quota
limit is reached. As a result, xfs_file_buffered_write() will flush
the whole filesystem instead of the project quota.

Fix the issue by make xfs_trans_dqresv() return -EDQUOT rather than
-ENOSPC. Add a helper, xfs_blockgc_nospace_flush(), to make flushing
for both EDQUOT and ENOSPC consistent.

Changes since v2:
  - completely rewrote based on the suggestions from Dave

Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jian Wen <wenjian1@xiaomi.com>
---
 fs/xfs/xfs_dquot.h       | 13 +++++++++++
 fs/xfs/xfs_file.c        | 40 +++++++++++---------------------
 fs/xfs/xfs_icache.c      | 50 +++++++++++++++++++++++++++++-----------
 fs/xfs/xfs_icache.h      |  7 +++---
 fs/xfs/xfs_inode.c       | 19 ++++++++-------
 fs/xfs/xfs_reflink.c     |  2 ++
 fs/xfs/xfs_trans.c       | 39 +++++++++++++++++++++++--------
 fs/xfs/xfs_trans_dquot.c |  3 ---
 8 files changed, 109 insertions(+), 64 deletions(-)

diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 80c8f851a2f3..c5f4a170eef1 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -183,6 +183,19 @@ xfs_dquot_is_enforced(
 	return false;
 }
 
+static inline bool
+xfs_dquot_is_enospc(
+	struct xfs_dquot	*dqp)
+{
+	if (!dqp)
+		return false;
+	if (!xfs_dquot_is_enforced(dqp))
+		return false;
+	if (dqp->q_blk.hardlimit - dqp->q_blk.reserved > 0)
+		return false;
+	return true;
+}
+
 /*
  * Check whether a dquot is under low free space conditions. We assume the quota
  * is enabled and enforced.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e33e5e13b95f..4b6e90bb1c59 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -24,6 +24,9 @@
 #include "xfs_pnfs.h"
 #include "xfs_iomap.h"
 #include "xfs_reflink.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
 
 #include <linux/dax.h>
 #include <linux/falloc.h>
@@ -785,32 +788,17 @@ xfs_file_buffered_write(
 	trace_xfs_file_buffered_write(iocb, from);
 	ret = iomap_file_buffered_write(iocb, from,
 			&xfs_buffered_write_iomap_ops);
-
-	/*
-	 * If we hit a space limit, try to free up some lingering preallocated
-	 * space before returning an error. In the case of ENOSPC, first try to
-	 * write back all dirty inodes to free up some of the excess reserved
-	 * metadata space. This reduces the chances that the eofblocks scan
-	 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
-	 * also behaves as a filter to prevent too many eofblocks scans from
-	 * running at the same time.  Use a synchronous scan to increase the
-	 * effectiveness of the scan.
-	 */
-	if (ret == -EDQUOT && !cleared_space) {
-		xfs_iunlock(ip, iolock);
-		xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
-		cleared_space = true;
-		goto write_retry;
-	} else if (ret == -ENOSPC && !cleared_space) {
-		struct xfs_icwalk	icw = {0};
-
-		cleared_space = true;
-		xfs_flush_inodes(ip->i_mount);
-
-		xfs_iunlock(ip, iolock);
-		icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
-		xfs_blockgc_free_space(ip->i_mount, &icw);
-		goto write_retry;
+	if (ret == -EDQUOT || ret == -ENOSPC) {
+		if (!cleared_space) {
+			xfs_iunlock(ip, iolock);
+			xfs_blockgc_nospace_flush(ip->i_mount, ip->i_udquot,
+						ip->i_gdquot, ip->i_pdquot,
+						XFS_ICWALK_FLAG_SYNC, ret);
+			cleared_space = true;
+			goto write_retry;
+		}
+		if (ret == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
+			ret = -ENOSPC;
 	}
 
 out:
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index dba514a2c84d..d2dcb653befc 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -64,6 +64,10 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
 					 XFS_ICWALK_FLAG_RECLAIM_SICK | \
 					 XFS_ICWALK_FLAG_UNION)
 
+static int xfs_blockgc_free_dquots(struct xfs_mount *mp,
+		struct xfs_dquot *udqp, struct xfs_dquot *gdqp,
+		struct xfs_dquot *pdqp, unsigned int iwalk_flags);
+
 /*
  * Allocate and initialise an xfs_inode.
  */
@@ -1477,6 +1481,38 @@ xfs_blockgc_free_space(
 	return xfs_inodegc_flush(mp);
 }
 
+/*
+ * If we hit a space limit, try to free up some lingering preallocated
+ * space before returning an error. In the case of ENOSPC, first try to
+ * write back all dirty inodes to free up some of the excess reserved
+ * metadata space. This reduces the chances that the eofblocks scan
+ * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
+ * also behaves as a filter to prevent too many eofblocks scans from
+ * running at the same time.  Use a synchronous scan to increase the
+ * effectiveness of the scan.
+ */
+void
+xfs_blockgc_nospace_flush(
+	struct xfs_mount	*mp,
+	struct xfs_dquot	*udqp,
+	struct xfs_dquot	*gdqp,
+	struct xfs_dquot	*pdqp,
+	unsigned int		iwalk_flags,
+	int			what)
+{
+	ASSERT(what == -EDQUOT || what == -ENOSPC);
+
+	if (what == -EDQUOT) {
+		xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, iwalk_flags);
+	} else if (what == -ENOSPC) {
+		struct xfs_icwalk	icw = {0};
+
+		xfs_flush_inodes(mp);
+		icw.icw_flags = iwalk_flags;
+		xfs_blockgc_free_space(mp, &icw);
+	}
+}
+
 /*
  * Reclaim all the free space that we can by scheduling the background blockgc
  * and inodegc workers immediately and waiting for them all to clear.
@@ -1515,7 +1551,7 @@ xfs_blockgc_flush_all(
  * (XFS_ICWALK_FLAG_SYNC), the caller also must not hold any inode's IOLOCK or
  * MMAPLOCK.
  */
-int
+static int
 xfs_blockgc_free_dquots(
 	struct xfs_mount	*mp,
 	struct xfs_dquot	*udqp,
@@ -1559,18 +1595,6 @@ xfs_blockgc_free_dquots(
 	return xfs_blockgc_free_space(mp, &icw);
 }
 
-/* Run cow/eofblocks scans on the quotas attached to the inode. */
-int
-xfs_blockgc_free_quota(
-	struct xfs_inode	*ip,
-	unsigned int		iwalk_flags)
-{
-	return xfs_blockgc_free_dquots(ip->i_mount,
-			xfs_inode_dquot(ip, XFS_DQTYPE_USER),
-			xfs_inode_dquot(ip, XFS_DQTYPE_GROUP),
-			xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), iwalk_flags);
-}
-
 /* XFS Inode Cache Walking Code */
 
 /*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 905944dafbe5..c0833450969d 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -57,11 +57,10 @@ long xfs_reclaim_inodes_nr(struct xfs_mount *mp, unsigned long nr_to_scan);
 
 void xfs_inode_mark_reclaimable(struct xfs_inode *ip);
 
-int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
-		struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
-		unsigned int iwalk_flags);
-int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
 int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
+void xfs_blockgc_nospace_flush(struct xfs_mount *mp, struct xfs_dquot *udqp,
+			struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
+			unsigned int iwalk_flags, int what);
 int xfs_blockgc_flush_all(struct xfs_mount *mp);
 
 void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0f1c89786c2..e99ffa17d3d0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,6 +27,8 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
 #include "xfs_filestream.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
@@ -1007,12 +1009,6 @@ xfs_create(
 	 */
 	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
 			&tp);
-	if (error == -ENOSPC) {
-		/* flush outstanding delalloc blocks and retry */
-		xfs_flush_inodes(mp);
-		error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
-				resblks, &tp);
-	}
 	if (error)
 		goto out_release_dquots;
 
@@ -2951,14 +2947,21 @@ xfs_rename(
 	if (spaceres != 0) {
 		error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
 				0, false);
-		if (error == -EDQUOT || error == -ENOSPC) {
+		if (error == -EDQUOT) {
 			if (!retried) {
 				xfs_trans_cancel(tp);
-				xfs_blockgc_free_quota(target_dp, 0);
+				xfs_blockgc_nospace_flush(target_dp->i_mount,
+							target_dp->i_udquot,
+							target_dp->i_gdquot,
+							target_dp->i_pdquot,
+							0, error);
 				retried = true;
 				goto retry;
 			}
 
+			if (xfs_dquot_is_enospc(target_dp->i_pdquot))
+				error = -ENOSPC;
+
 			nospace_error = error;
 			spaceres = 0;
 			error = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e5b62dc28466..cb036e1173ae 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -25,6 +25,8 @@
 #include "xfs_bit.h"
 #include "xfs_alloc.h"
 #include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
 #include "xfs_reflink.h"
 #include "xfs_iomap.h"
 #include "xfs_ag.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 305c9d07bf1b..1574d7aa49c4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1217,15 +1217,21 @@ xfs_trans_alloc_inode(
 	}
 
 	error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force);
-	if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+	if (error == -EDQUOT && !retried) {
 		xfs_trans_cancel(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		xfs_blockgc_free_quota(ip, 0);
+		xfs_blockgc_nospace_flush(ip->i_mount, ip->i_udquot,
+					ip->i_gdquot, ip->i_pdquot,
+					0, error);
 		retried = true;
 		goto retry;
 	}
-	if (error)
+	if (error) {
+		if (error == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
+			error = -ENOSPC;
+
 		goto out_cancel;
+	}
 
 	*tpp = tp;
 	return 0;
@@ -1260,13 +1266,16 @@ xfs_trans_alloc_icreate(
 		return error;
 
 	error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks);
-	if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+	if (error == -EDQUOT && !retried) {
 		xfs_trans_cancel(tp);
-		xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+		xfs_blockgc_nospace_flush(mp, udqp, gdqp, pdqp, 0, error);
 		retried = true;
 		goto retry;
 	}
 	if (error) {
+		if (error == -EDQUOT && xfs_dquot_is_enospc(pdqp))
+			error = -ENOSPC;
+
 		xfs_trans_cancel(tp);
 		return error;
 	}
@@ -1340,14 +1349,19 @@ xfs_trans_alloc_ichange(
 		error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
 				pdqp, ip->i_nblocks + ip->i_delayed_blks,
 				1, qflags);
-		if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+		if (error == -EDQUOT && !retried) {
 			xfs_trans_cancel(tp);
-			xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+			xfs_blockgc_nospace_flush(mp, udqp, gdqp, pdqp, 0,
+						error);
 			retried = true;
 			goto retry;
 		}
-		if (error)
+		if (error) {
+			if (error == -EDQUOT && xfs_dquot_is_enospc(pdqp))
+				error = -ENOSPC;
+
 			goto out_cancel;
+		}
 	}
 
 	*tpp = tp;
@@ -1419,14 +1433,19 @@ xfs_trans_alloc_dir(
 		goto done;
 
 	error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
-	if (error == -EDQUOT || error == -ENOSPC) {
+	if (error == -EDQUOT) {
 		if (!retried) {
 			xfs_trans_cancel(tp);
-			xfs_blockgc_free_quota(dp, 0);
+			xfs_blockgc_nospace_flush(dp->i_mount, ip->i_udquot,
+						ip->i_gdquot, ip->i_pdquot,
+						0, error);
 			retried = true;
 			goto retry;
 		}
 
+		if (xfs_dquot_is_enospc(dp->i_pdquot))
+			error = -ENOSPC;
+
 		*nospace_error = error;
 		resblks = 0;
 		error = 0;
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index aa00cf67ad72..7201b86ef2c2 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -700,8 +700,6 @@ xfs_trans_dqresv(
 
 error_return:
 	xfs_dqunlock(dqp);
-	if (xfs_dquot_type(dqp) == XFS_DQTYPE_PROJ)
-		return -ENOSPC;
 	return -EDQUOT;
 error_corrupt:
 	xfs_dqunlock(dqp);
@@ -717,7 +715,6 @@ xfs_trans_dqresv(
  * approach.
  *
  * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *	   XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
  *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
  *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
  * dquots are unlocked on return, if they were not locked by caller.
-- 
2.34.1


  parent reply	other threads:[~2023-12-23 10:57 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-14 15:07 [PATCH] xfs: improve handling of prjquot ENOSPC Jian Wen
2023-12-14 15:29 ` Christoph Hellwig
2023-12-14 17:06 ` Darrick J. Wong
2023-12-14 21:13 ` Dave Chinner
2023-12-16 15:49   ` Jian Wen
2023-12-16 15:35 ` [PATCH v2] " Jian Wen
2023-12-18 22:00   ` Dave Chinner
2023-12-19  5:47     ` Christoph Hellwig
2023-12-19 13:50     ` Jian Wen
2023-12-23 11:00     ` Jian Wen
2024-01-04  6:22   ` [PATCH v4] " Jian Wen
2024-01-08  0:35     ` Dave Chinner
2024-01-09  6:14       ` Darrick J. Wong
2024-01-09  6:38         ` Dave Chinner
2024-01-11  1:42           ` Darrick J. Wong
2024-01-11  7:24             ` Dave Chinner
2024-01-10 14:08     ` kernel test robot
2023-12-23 10:56 ` Jian Wen [this message]
2024-01-03  1:42   ` [PATCH v3] " Darrick J. Wong
2024-01-03  3:45     ` Jian Wen
2024-01-04  1:46       ` Darrick J. Wong
2024-01-04  3:36         ` Jian Wen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231223105632.85286-1-wenjianhn@gmail.com \
    --to=wenjianhn@gmail.com \
    --cc=david@fromorbit.com \
    --cc=dchinner@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-xfs@vger.kernel.org \
    --cc=wenjian1@xiaomi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).