From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 3/3] xfs: xfs_trans_dqresv() can be made lockless
Date: Thu, 12 Dec 2013 20:40:58 +1100 [thread overview]
Message-ID: <1386841258-22183-4-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1386841258-22183-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
xfs_trans_dqresv() serialises dquot modifications by taking the
dquot lock while it is doing reservations. The thing is, nothing it
does really requires exclusive access to the dquot except for the
reservation accounting. We can do that locklessly with cmpxchg.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_trans_dquot.c | 372 +++++++++++++++++++++++++++--------------------
1 file changed, 213 insertions(+), 159 deletions(-)
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4117286..fa89d21 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -33,6 +33,97 @@
STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
+STATIC void
+xfs_quota_warn(
+ struct xfs_mount *mp,
+ struct xfs_dquot *dqp,
+ int type)
+{
+ /* no warnings for project quotas - we just return ENOSPC later */
+ if (dqp->dq_flags & XFS_DQ_PROJ)
+ return;
+ quota_send_warning(make_kqid(&init_user_ns,
+ (dqp->dq_flags & XFS_DQ_USER) ?
+ USRQUOTA : GRPQUOTA,
+ be32_to_cpu(dqp->q_core.d_id)),
+ mp->m_super->s_dev, type);
+}
+
+/*
+ * See if we'd go over the hardlimit or exceed the timelimit if we allocate
+ * nblks.
+ */
+static bool
+xfs_dqlimits_exceeded(
+ struct xfs_mount *mp,
+ struct xfs_dquot *dqp,
+ bool blklimit,
+ xfs_qcnt_t total_count,
+ xfs_qcnt_t hardlimit,
+ xfs_qcnt_t softlimit,
+ time_t timer,
+ xfs_qwarncnt_t warns,
+ xfs_qwarncnt_t warnlimit)
+{
+ if (hardlimit && total_count > hardlimit) {
+ xfs_quota_warn(mp, dqp, blklimit ? QUOTA_NL_BHARDWARN
+ : QUOTA_NL_IHARDWARN);
+ return true;
+ }
+
+ if (softlimit && total_count > softlimit) {
+ if ((timer && get_seconds() > timer) ||
+ (warns && warns >= warnlimit)) {
+ xfs_quota_warn(mp, dqp, blklimit
+ ? QUOTA_NL_BSOFTLONGWARN
+ : QUOTA_NL_ISOFTLONGWARN);
+ return true;
+ }
+ xfs_quota_warn(mp, dqp, blklimit ? QUOTA_NL_BSOFTWARN
+ : QUOTA_NL_ISOFTWARN);
+ }
+ return false;
+}
+
+/*
+ * Make the required reservation, first checking the limits provided (if
+ * required) to see if we'd exceed the quota limits.
+ */
+static xfs_qcnt_t
+xfs_dqresv_cmpxchg(
+ struct xfs_mount *mp,
+ struct xfs_dquot *dqp,
+ xfs_qcnt_t *cntp,
+ xfs_qcnt_t diff,
+ bool blklimit,
+ bool enforce,
+ xfs_qcnt_t hardlimit,
+ xfs_qcnt_t softlimit,
+ time_t timer,
+ xfs_qwarncnt_t warns,
+ xfs_qwarncnt_t warnlimit)
+{
+ xfs_qcnt_t count;
+ xfs_qcnt_t old;
+
+ do {
+ xfs_qcnt_t total_count;
+
+ count = ACCESS_ONCE(*cntp);
+ total_count = count + diff;
+ if (enforce &&
+ xfs_dqlimits_exceeded(mp, dqp, blklimit, total_count,
+ hardlimit, softlimit, timer, warns,
+ warnlimit))
+ return -1ULL;
+
+ old = count;
+ count = cmpxchg64(cntp, old, total_count);
+ } while (count != old);
+
+ return old;
+}
+
/*
* Add the locked dquot to the transaction.
* The dquot must be locked, and it cannot be associated with any
@@ -315,6 +406,18 @@ xfs_trans_dqlockedjoin(
}
}
+static int64_t
+xfs_dqresv_return(
+ xfs_qcnt_t resv,
+ xfs_qcnt_t resv_used,
+ xfs_qcnt_t delta)
+{
+ if (!resv)
+ return delta;
+ if (resv > resv_used)
+ return resv_used - resv;
+ return resv - resv_used;
+}
/*
* Called by xfs_trans_commit() and similar in spirit to
@@ -334,6 +437,7 @@ xfs_trans_apply_dquot_deltas(
struct xfs_disk_dquot *d;
long totalbdelta;
long totalrtbdelta;
+ struct xfs_mount *mp = tp->t_mountp;
if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
@@ -350,6 +454,7 @@ xfs_trans_apply_dquot_deltas(
xfs_trans_dqlockedjoin(tp, qa);
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+ int64_t diff;
qtrx = &qa[i];
/*
* The array of dquots is filled
@@ -419,73 +524,46 @@ xfs_trans_apply_dquot_deltas(
* add this to the list of items to get logged
*/
xfs_trans_log_dquot(tp, dqp);
+
/*
* Take off what's left of the original reservation.
* In case of delayed allocations, there's no
* reservation that a transaction structure knows of.
*/
- if (qtrx->qt_blk_res != 0) {
- if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
- if (qtrx->qt_blk_res >
- qtrx->qt_blk_res_used)
- dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res -
- qtrx->qt_blk_res_used);
- else
- dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res_used -
- qtrx->qt_blk_res);
- }
- } else {
- /*
- * These blks were never reserved, either inside
- * a transaction or outside one (in a delayed
- * allocation). Also, this isn't always a
- * negative number since we sometimes
- * deliberately skip quota reservations.
- */
- if (qtrx->qt_bcount_delta) {
- dqp->q_res_bcount +=
- (xfs_qcnt_t)qtrx->qt_bcount_delta;
- }
- }
+ diff = xfs_dqresv_return(qtrx->qt_blk_res,
+ qtrx->qt_blk_res_used,
+ qtrx->qt_bcount_delta);
+ if (diff)
+ xfs_dqresv_cmpxchg(mp, dqp, &dqp->q_res_bcount,
+ diff, true, false, 0, 0, 0,
+ 0, 0);
/*
* Adjust the RT reservation.
*/
- if (qtrx->qt_rtblk_res != 0) {
- if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
- if (qtrx->qt_rtblk_res >
- qtrx->qt_rtblk_res_used)
- dqp->q_res_rtbcount -= (xfs_qcnt_t)
- (qtrx->qt_rtblk_res -
- qtrx->qt_rtblk_res_used);
- else
- dqp->q_res_rtbcount -= (xfs_qcnt_t)
- (qtrx->qt_rtblk_res_used -
- qtrx->qt_rtblk_res);
- }
- } else {
- if (qtrx->qt_rtbcount_delta)
- dqp->q_res_rtbcount +=
- (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
- }
+ diff = xfs_dqresv_return(qtrx->qt_rtblk_res,
+ qtrx->qt_rtblk_res_used,
+ qtrx->qt_rtbcount_delta);
+ if (diff)
+ xfs_dqresv_cmpxchg(mp, dqp, &dqp->q_res_rtbcount,
+ diff, true, false, 0, 0, 0,
+ 0, 0);
/*
* Adjust the inode reservation.
*/
- if (qtrx->qt_ino_res != 0) {
+ if (qtrx->qt_ino_res == 0)
+ diff = qtrx->qt_icount_delta;
+ else {
ASSERT(qtrx->qt_ino_res >=
qtrx->qt_ino_res_used);
- if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
- dqp->q_res_icount -= (xfs_qcnt_t)
- (qtrx->qt_ino_res -
- qtrx->qt_ino_res_used);
- } else {
- if (qtrx->qt_icount_delta)
- dqp->q_res_icount +=
- (xfs_qcnt_t)qtrx->qt_icount_delta;
+ diff = qtrx->qt_ino_res - qtrx->qt_ino_res_used;
+ if (diff < 0)
+ diff = 0;
}
-
+ if (diff)
+ xfs_dqresv_cmpxchg(mp, dqp, &dqp->q_res_icount,
+ diff, true, false, 0, 0, 0,
+ 0, 0);
ASSERT(dqp->q_res_bcount >=
be64_to_cpu(dqp->q_core.d_bcount));
ASSERT(dqp->q_res_icount >=
@@ -562,22 +640,6 @@ xfs_trans_unreserve_and_mod_dquots(
}
}
-STATIC void
-xfs_quota_warn(
- struct xfs_mount *mp,
- struct xfs_dquot *dqp,
- int type)
-{
- /* no warnings for project quotas - we just return ENOSPC later */
- if (dqp->dq_flags & XFS_DQ_PROJ)
- return;
- quota_send_warning(make_kqid(&init_user_ns,
- (dqp->dq_flags & XFS_DQ_USER) ?
- USRQUOTA : GRPQUOTA,
- be32_to_cpu(dqp->q_core.d_id)),
- mp->m_super->s_dev, type);
-}
-
/*
* This reserves disk blocks and inodes against a dquot.
* Flags indicate if the dquot is to be locked here and also
@@ -591,20 +653,35 @@ xfs_trans_dqresv(
xfs_dquot_t *dqp,
long nblks,
long ninos,
- uint flags)
+ uint flags,
+ bool enforce)
{
xfs_qcnt_t hardlimit;
xfs_qcnt_t softlimit;
time_t timer;
xfs_qwarncnt_t warns;
xfs_qwarncnt_t warnlimit;
- xfs_qcnt_t total_count;
+ xfs_qcnt_t oldcnt;
xfs_qcnt_t *resbcountp;
xfs_quotainfo_t *q = mp->m_quotainfo;
+ /*
+ * Lockless reservation algorithm:
+ *
+ * sample block count, inode count, timers and limits
+ * cmpxchg loop to modify block reservation
+ * check limits:
+ * if over, check limits have not changed
+ * no change, fail
+ * cmpxchg block reservation
+ *
+ * if transaction, modify transaction context w/ change deltas.
+ * no locks required for this as context is private to transaction.
+ */
+ if (nblks == 0)
+ goto do_ninos;
- xfs_dqlock(dqp);
-
+ smp_mb();
if (flags & XFS_TRANS_DQ_RES_BLKS) {
hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
if (!hardlimit)
@@ -630,69 +707,35 @@ xfs_trans_dqresv(
resbcountp = &dqp->q_res_rtbcount;
}
- if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
- dqp->q_core.d_id &&
- ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
- (XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) ||
- (XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) {
- if (nblks > 0) {
- /*
- * dquot is locked already. See if we'd go over the
- * hardlimit or exceed the timelimit if we allocate
- * nblks.
- */
- total_count = *resbcountp + nblks;
- if (hardlimit && total_count > hardlimit) {
- xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
- goto error_return;
- }
- if (softlimit && total_count > softlimit) {
- if ((timer != 0 && get_seconds() > timer) ||
- (warns != 0 && warns >= warnlimit)) {
- xfs_quota_warn(mp, dqp,
- QUOTA_NL_BSOFTLONGWARN);
- goto error_return;
- }
-
- xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
- }
- }
- if (ninos > 0) {
- total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
- timer = be32_to_cpu(dqp->q_core.d_itimer);
- warns = be16_to_cpu(dqp->q_core.d_iwarns);
- warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
- hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
- if (!hardlimit)
- hardlimit = q->qi_ihardlimit;
- softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
- if (!softlimit)
- softlimit = q->qi_isoftlimit;
-
- if (hardlimit && total_count > hardlimit) {
- xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
- goto error_return;
- }
- if (softlimit && total_count > softlimit) {
- if ((timer != 0 && get_seconds() > timer) ||
- (warns != 0 && warns >= warnlimit)) {
- xfs_quota_warn(mp, dqp,
- QUOTA_NL_ISOFTLONGWARN);
- goto error_return;
- }
- xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
- }
- }
- }
-
- /*
- * Change the reservation, but not the actual usage.
- * Note that q_res_bcount = q_core.d_bcount + resv
- */
- (*resbcountp) += (xfs_qcnt_t)nblks;
- if (ninos != 0)
- dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
+ oldcnt = xfs_dqresv_cmpxchg(mp, dqp, resbcountp, nblks, true, enforce,
+ hardlimit, softlimit, timer, warns,
+ warnlimit);
+ if (oldcnt == (xfs_qcnt_t)-1ULL)
+ goto error_return;
+
+do_ninos:
+ if (ninos == 0)
+ goto do_trans;
+
+ smp_mb();
+ timer = be32_to_cpu(dqp->q_core.d_itimer);
+ warns = be16_to_cpu(dqp->q_core.d_iwarns);
+ warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+ hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+ if (!hardlimit)
+ hardlimit = q->qi_ihardlimit;
+ softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+ if (!softlimit)
+ softlimit = q->qi_isoftlimit;
+ resbcountp = &dqp->q_res_icount;
+
+ oldcnt = xfs_dqresv_cmpxchg(mp, dqp, resbcountp, ninos, false, enforce,
+ hardlimit, softlimit, timer, warns,
+ warnlimit);
+ if (oldcnt == (xfs_qcnt_t)-1ULL)
+ goto error_undo_nblks;
+
+do_trans:
/*
* note the reservation amt in the trans struct too,
* so that the transaction knows how much was reserved by
@@ -700,27 +743,30 @@ xfs_trans_dqresv(
* We don't do this when we are reserving for a delayed allocation,
* because we don't have the luxury of a transaction envelope then.
*/
- if (tp) {
- ASSERT(tp->t_dqinfo);
- ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
- if (nblks != 0)
- xfs_trans_mod_dquot(tp, dqp,
- flags & XFS_QMOPT_RESBLK_MASK,
- nblks);
- if (ninos != 0)
- xfs_trans_mod_dquot(tp, dqp,
- XFS_TRANS_DQ_RES_INOS,
- ninos);
- }
- ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
- ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
- ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
+ if (!tp)
+ return 0;
- xfs_dqunlock(dqp);
+ ASSERT(tp->t_dqinfo);
+ ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+ if (nblks)
+ xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK,
+ nblks);
+ if (ninos != 0)
+ xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos);
return 0;
+error_undo_nblks:
+ /* ninos reservation failed, so if we changed nblks, undo that. */
+ if (nblks) {
+ if (flags & XFS_TRANS_DQ_RES_BLKS)
+ resbcountp = &dqp->q_res_bcount;
+ else
+ resbcountp = &dqp->q_res_rtbcount;
+ xfs_dqresv_cmpxchg(mp, dqp, resbcountp, -nblks, true, false,
+ 0, 0, 0, 0, 0);
+ }
+
error_return:
- xfs_dqunlock(dqp);
if (flags & XFS_QMOPT_ENOSPC)
return ENOSPC;
return EDQUOT;
@@ -751,6 +797,7 @@ xfs_trans_reserve_quota_bydquots(
uint flags)
{
int error;
+ bool enforce;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
@@ -761,20 +808,28 @@ xfs_trans_reserve_quota_bydquots(
ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
if (udqp) {
+ enforce = !(flags & XFS_QMOPT_FORCE_RES) &&
+ udqp->q_core.d_id && XFS_IS_UQUOTA_ENFORCED(mp);
error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
- (flags & ~XFS_QMOPT_ENOSPC));
+ (flags & ~XFS_QMOPT_ENOSPC), enforce);
if (error)
return error;
}
if (gdqp) {
- error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
+ enforce = !(flags & XFS_QMOPT_FORCE_RES) &&
+ gdqp->q_core.d_id && XFS_IS_GQUOTA_ENFORCED(mp);
+ error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags,
+ enforce);
if (error)
goto unwind_usr;
}
if (pdqp) {
- error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags);
+ enforce = !(flags & XFS_QMOPT_FORCE_RES) &&
+ pdqp->q_core.d_id && XFS_IS_PQUOTA_ENFORCED(mp);
+ error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags,
+ enforce);
if (error)
goto unwind_grp;
}
@@ -784,14 +839,13 @@ xfs_trans_reserve_quota_bydquots(
*/
return 0;
+ /* unwinding does not require limit enforcement. */
unwind_grp:
- flags |= XFS_QMOPT_FORCE_RES;
if (gdqp)
- xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags);
+ xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags, false);
unwind_usr:
- flags |= XFS_QMOPT_FORCE_RES;
if (udqp)
- xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags);
+ xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags, false);
return error;
}
--
1.8.4.rc3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2013-12-12 9:41 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-12 9:40 [PATCH 0/3] xfs: dquot modification scalability Dave Chinner
2013-12-12 9:40 ` [PATCH 1/3] xfs: remote dquot hints Dave Chinner
2013-12-12 18:33 ` Christoph Hellwig
2013-12-12 9:40 ` [PATCH 2/3] xfs: dquot refcounting by atomics Dave Chinner
2013-12-13 13:23 ` Christoph Hellwig
2013-12-12 9:40 ` Dave Chinner [this message]
2013-12-13 13:37 ` [PATCH 3/3] xfs: xfs_trans_dqresv() can be made lockless Christoph Hellwig
2013-12-16 0:11 ` Dave Chinner
2013-12-12 10:25 ` [PATCH 4/3] xfs: xfs_qm_dqrele mostly doesn't need locking Dave Chinner
2013-12-13 13:28 ` Christoph Hellwig
2013-12-13 21:30 ` Dave Chinner
2013-12-16 18:21 ` Christoph Hellwig
2013-12-13 16:30 ` [PATCH 5/3] xfs: return unlocked dquots from xfs_qm_dqqet Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386841258-22183-4-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox