From: Jian Wen <wenjianhn@gmail.com>
To: linux-xfs@vger.kernel.org
Cc: Jian Wen <wenjianhn@gmail.com>,
djwong@kernel.org, hch@lst.de, dchinner@redhat.com,
Dave Chinner <david@fromorbit.com>,
Jian Wen <wenjian1@xiaomi.com>
Subject: [PATCH v3] xfs: improve handling of prjquot ENOSPC
Date: Sat, 23 Dec 2023 18:56:32 +0800 [thread overview]
Message-ID: <20231223105632.85286-1-wenjianhn@gmail.com> (raw)
In-Reply-To: <20231214150708.77586-1-wenjianhn@gmail.com>
Currently, xfs_trans_dqresv() return -ENOSPC when the project quota
limit is reached. As a result, xfs_file_buffered_write() will flush
the whole filesystem instead of the project quota.
Fix the issue by make xfs_trans_dqresv() return -EDQUOT rather than
-ENOSPC. Add a helper, xfs_blockgc_nospace_flush(), to make flushing
for both EDQUOT and ENOSPC consistent.
Changes since v2:
- completely rewrote based on the suggestions from Dave
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jian Wen <wenjian1@xiaomi.com>
---
fs/xfs/xfs_dquot.h | 13 +++++++++++
fs/xfs/xfs_file.c | 40 +++++++++++---------------------
fs/xfs/xfs_icache.c | 50 +++++++++++++++++++++++++++++-----------
fs/xfs/xfs_icache.h | 7 +++---
fs/xfs/xfs_inode.c | 19 ++++++++-------
fs/xfs/xfs_reflink.c | 2 ++
fs/xfs/xfs_trans.c | 39 +++++++++++++++++++++++--------
fs/xfs/xfs_trans_dquot.c | 3 ---
8 files changed, 109 insertions(+), 64 deletions(-)
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 80c8f851a2f3..c5f4a170eef1 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -183,6 +183,19 @@ xfs_dquot_is_enforced(
return false;
}
+static inline bool
+xfs_dquot_is_enospc(
+ struct xfs_dquot *dqp)
+{
+ if (!dqp)
+ return false;
+ if (!xfs_dquot_is_enforced(dqp))
+ return false;
+ if (dqp->q_blk.hardlimit - dqp->q_blk.reserved > 0)
+ return false;
+ return true;
+}
+
/*
* Check whether a dquot is under low free space conditions. We assume the quota
* is enabled and enforced.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e33e5e13b95f..4b6e90bb1c59 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -24,6 +24,9 @@
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
+#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include <linux/dax.h>
#include <linux/falloc.h>
@@ -785,32 +788,17 @@ xfs_file_buffered_write(
trace_xfs_file_buffered_write(iocb, from);
ret = iomap_file_buffered_write(iocb, from,
&xfs_buffered_write_iomap_ops);
-
- /*
- * If we hit a space limit, try to free up some lingering preallocated
- * space before returning an error. In the case of ENOSPC, first try to
- * write back all dirty inodes to free up some of the excess reserved
- * metadata space. This reduces the chances that the eofblocks scan
- * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
- * also behaves as a filter to prevent too many eofblocks scans from
- * running at the same time. Use a synchronous scan to increase the
- * effectiveness of the scan.
- */
- if (ret == -EDQUOT && !cleared_space) {
- xfs_iunlock(ip, iolock);
- xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
- cleared_space = true;
- goto write_retry;
- } else if (ret == -ENOSPC && !cleared_space) {
- struct xfs_icwalk icw = {0};
-
- cleared_space = true;
- xfs_flush_inodes(ip->i_mount);
-
- xfs_iunlock(ip, iolock);
- icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
- xfs_blockgc_free_space(ip->i_mount, &icw);
- goto write_retry;
+ if (ret == -EDQUOT || ret == -ENOSPC) {
+ if (!cleared_space) {
+ xfs_iunlock(ip, iolock);
+ xfs_blockgc_nospace_flush(ip->i_mount, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ XFS_ICWALK_FLAG_SYNC, ret);
+ cleared_space = true;
+ goto write_retry;
+ }
+ if (ret == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
+ ret = -ENOSPC;
}
out:
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index dba514a2c84d..d2dcb653befc 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -64,6 +64,10 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
XFS_ICWALK_FLAG_RECLAIM_SICK | \
XFS_ICWALK_FLAG_UNION)
+static int xfs_blockgc_free_dquots(struct xfs_mount *mp,
+ struct xfs_dquot *udqp, struct xfs_dquot *gdqp,
+ struct xfs_dquot *pdqp, unsigned int iwalk_flags);
+
/*
* Allocate and initialise an xfs_inode.
*/
@@ -1477,6 +1481,38 @@ xfs_blockgc_free_space(
return xfs_inodegc_flush(mp);
}
+/*
+ * If we hit a space limit, try to free up some lingering preallocated
+ * space before returning an error. In the case of ENOSPC, first try to
+ * write back all dirty inodes to free up some of the excess reserved
+ * metadata space. This reduces the chances that the eofblocks scan
+ * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
+ * also behaves as a filter to prevent too many eofblocks scans from
+ * running at the same time. Use a synchronous scan to increase the
+ * effectiveness of the scan.
+ */
+void
+xfs_blockgc_nospace_flush(
+ struct xfs_mount *mp,
+ struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp,
+ struct xfs_dquot *pdqp,
+ unsigned int iwalk_flags,
+ int what)
+{
+ ASSERT(what == -EDQUOT || what == -ENOSPC);
+
+ if (what == -EDQUOT) {
+ xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, iwalk_flags);
+ } else if (what == -ENOSPC) {
+ struct xfs_icwalk icw = {0};
+
+ xfs_flush_inodes(mp);
+ icw.icw_flags = iwalk_flags;
+ xfs_blockgc_free_space(mp, &icw);
+ }
+}
+
/*
* Reclaim all the free space that we can by scheduling the background blockgc
* and inodegc workers immediately and waiting for them all to clear.
@@ -1515,7 +1551,7 @@ xfs_blockgc_flush_all(
* (XFS_ICWALK_FLAG_SYNC), the caller also must not hold any inode's IOLOCK or
* MMAPLOCK.
*/
-int
+static int
xfs_blockgc_free_dquots(
struct xfs_mount *mp,
struct xfs_dquot *udqp,
@@ -1559,18 +1595,6 @@ xfs_blockgc_free_dquots(
return xfs_blockgc_free_space(mp, &icw);
}
-/* Run cow/eofblocks scans on the quotas attached to the inode. */
-int
-xfs_blockgc_free_quota(
- struct xfs_inode *ip,
- unsigned int iwalk_flags)
-{
- return xfs_blockgc_free_dquots(ip->i_mount,
- xfs_inode_dquot(ip, XFS_DQTYPE_USER),
- xfs_inode_dquot(ip, XFS_DQTYPE_GROUP),
- xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), iwalk_flags);
-}
-
/* XFS Inode Cache Walking Code */
/*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 905944dafbe5..c0833450969d 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -57,11 +57,10 @@ long xfs_reclaim_inodes_nr(struct xfs_mount *mp, unsigned long nr_to_scan);
void xfs_inode_mark_reclaimable(struct xfs_inode *ip);
-int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
- struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
- unsigned int iwalk_flags);
-int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
+void xfs_blockgc_nospace_flush(struct xfs_mount *mp, struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp, struct xfs_dquot *pdqp,
+ unsigned int iwalk_flags, int what);
int xfs_blockgc_flush_all(struct xfs_mount *mp);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0f1c89786c2..e99ffa17d3d0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -27,6 +27,8 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include "xfs_filestream.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
@@ -1007,12 +1009,6 @@ xfs_create(
*/
error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
&tp);
- if (error == -ENOSPC) {
- /* flush outstanding delalloc blocks and retry */
- xfs_flush_inodes(mp);
- error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
- resblks, &tp);
- }
if (error)
goto out_release_dquots;
@@ -2951,14 +2947,21 @@ xfs_rename(
if (spaceres != 0) {
error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
0, false);
- if (error == -EDQUOT || error == -ENOSPC) {
+ if (error == -EDQUOT) {
if (!retried) {
xfs_trans_cancel(tp);
- xfs_blockgc_free_quota(target_dp, 0);
+ xfs_blockgc_nospace_flush(target_dp->i_mount,
+ target_dp->i_udquot,
+ target_dp->i_gdquot,
+ target_dp->i_pdquot,
+ 0, error);
retried = true;
goto retry;
}
+ if (xfs_dquot_is_enospc(target_dp->i_pdquot))
+ error = -ENOSPC;
+
nospace_error = error;
spaceres = 0;
error = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e5b62dc28466..cb036e1173ae 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -25,6 +25,8 @@
#include "xfs_bit.h"
#include "xfs_alloc.h"
#include "xfs_quota.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
#include "xfs_reflink.h"
#include "xfs_iomap.h"
#include "xfs_ag.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 305c9d07bf1b..1574d7aa49c4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1217,15 +1217,21 @@ xfs_trans_alloc_inode(
}
error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force);
- if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ if (error == -EDQUOT && !retried) {
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_blockgc_free_quota(ip, 0);
+ xfs_blockgc_nospace_flush(ip->i_mount, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ 0, error);
retried = true;
goto retry;
}
- if (error)
+ if (error) {
+ if (error == -EDQUOT && xfs_dquot_is_enospc(ip->i_pdquot))
+ error = -ENOSPC;
+
goto out_cancel;
+ }
*tpp = tp;
return 0;
@@ -1260,13 +1266,16 @@ xfs_trans_alloc_icreate(
return error;
error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks);
- if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ if (error == -EDQUOT && !retried) {
xfs_trans_cancel(tp);
- xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+ xfs_blockgc_nospace_flush(mp, udqp, gdqp, pdqp, 0, error);
retried = true;
goto retry;
}
if (error) {
+ if (error == -EDQUOT && xfs_dquot_is_enospc(pdqp))
+ error = -ENOSPC;
+
xfs_trans_cancel(tp);
return error;
}
@@ -1340,14 +1349,19 @@ xfs_trans_alloc_ichange(
error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
pdqp, ip->i_nblocks + ip->i_delayed_blks,
1, qflags);
- if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ if (error == -EDQUOT && !retried) {
xfs_trans_cancel(tp);
- xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+ xfs_blockgc_nospace_flush(mp, udqp, gdqp, pdqp, 0,
+ error);
retried = true;
goto retry;
}
- if (error)
+ if (error) {
+ if (error == -EDQUOT && xfs_dquot_is_enospc(pdqp))
+ error = -ENOSPC;
+
goto out_cancel;
+ }
}
*tpp = tp;
@@ -1419,14 +1433,19 @@ xfs_trans_alloc_dir(
goto done;
error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
- if (error == -EDQUOT || error == -ENOSPC) {
+ if (error == -EDQUOT) {
if (!retried) {
xfs_trans_cancel(tp);
- xfs_blockgc_free_quota(dp, 0);
+ xfs_blockgc_nospace_flush(dp->i_mount, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ 0, error);
retried = true;
goto retry;
}
+ if (xfs_dquot_is_enospc(dp->i_pdquot))
+ error = -ENOSPC;
+
*nospace_error = error;
resblks = 0;
error = 0;
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index aa00cf67ad72..7201b86ef2c2 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -700,8 +700,6 @@ xfs_trans_dqresv(
error_return:
xfs_dqunlock(dqp);
- if (xfs_dquot_type(dqp) == XFS_DQTYPE_PROJ)
- return -ENOSPC;
return -EDQUOT;
error_corrupt:
xfs_dqunlock(dqp);
@@ -717,7 +715,6 @@ xfs_trans_dqresv(
* approach.
*
* flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota.
* XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
* XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
* dquots are unlocked on return, if they were not locked by caller.
--
2.34.1
next prev parent reply other threads:[~2023-12-23 10:57 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-14 15:07 [PATCH] xfs: improve handling of prjquot ENOSPC Jian Wen
2023-12-14 15:29 ` Christoph Hellwig
2023-12-14 17:06 ` Darrick J. Wong
2023-12-14 21:13 ` Dave Chinner
2023-12-16 15:49 ` Jian Wen
2023-12-16 15:35 ` [PATCH v2] " Jian Wen
2023-12-18 22:00 ` Dave Chinner
2023-12-19 5:47 ` Christoph Hellwig
2023-12-19 13:50 ` Jian Wen
2023-12-23 11:00 ` Jian Wen
2024-01-04 6:22 ` [PATCH v4] " Jian Wen
2024-01-08 0:35 ` Dave Chinner
2024-01-09 6:14 ` Darrick J. Wong
2024-01-09 6:38 ` Dave Chinner
2024-01-11 1:42 ` Darrick J. Wong
2024-01-11 7:24 ` Dave Chinner
2024-01-10 14:08 ` kernel test robot
2023-12-23 10:56 ` Jian Wen [this message]
2024-01-03 1:42 ` [PATCH v3] " Darrick J. Wong
2024-01-03 3:45 ` Jian Wen
2024-01-04 1:46 ` Darrick J. Wong
2024-01-04 3:36 ` Jian Wen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231223105632.85286-1-wenjianhn@gmail.com \
--to=wenjianhn@gmail.com \
--cc=david@fromorbit.com \
--cc=dchinner@redhat.com \
--cc=djwong@kernel.org \
--cc=hch@lst.de \
--cc=linux-xfs@vger.kernel.org \
--cc=wenjian1@xiaomi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).