linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dmitry Monakhov <dmonakhov@openvz.org>
To: Jan Kara <jack@suse.cz>
Cc: linux-fsdevel@vger.kernel.org, Dmitry Monakhov <dmonakhov@openvz.org>
Subject: [PATCH] quota: quota initialization improvements
Date: Thu, 17 Dec 2009 04:14:37 +0300	[thread overview]
Message-ID: <1261012477-14526-1-git-send-email-dmonakhov@openvz.org> (raw)
In-Reply-To: <0KUR00I4YVKZG820@mail.2ka.mipt.ru>

vfs_dq_init() is called from many places (almost from all generic
vfs functions). What's why this function is so performance critical.
Current dquot_initialize() implementation is far from optimal because:

Usually it called for inode which already has quota initialized
 This fast path may be easily optimized. Just check i_dquot[].
 In fact, no locking is needed here.

In case of slow path(where inode has no quota yet) it requires to hold
dqptr_sem for write. This assumtions comes from the the rule what states:
"Any changes with i_dquot[] must be protected by sem locked for write".
In fact changing i_dquot from NULL => !NULL  (is what vfs_dq_init does)
is differ from changing it from !NULL => NULL (vfs_dq_drop, and others)

The difference is what if we allow i_dquot[] to change from
NULL => !NULL under dqptr_sem locked for read this will not break usual
charging functions (alloc/claim/free, etc.) because pointer assignment
is atomic for all arch. The only changes we have to do to prevent races
is just atomically copy i_dquot[] to local variable at the beginning of
each charging function.
Race between concurrent init() functions is solved by cmpxchg().

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/quota/dquot.c |   87 ++++++++++++++++++++++++++++++++---------------------
 1 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index de7e7de..6979224 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1246,6 +1246,9 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
  *	Initialize quota pointers in inode
  *	We do things in a bit complicated way but by that we avoid calling
  *	dqget() and thus filesystem callbacks under dqptr_sem.
+ *	Lock dqptr_sem for read here, is acceptable, because i_dquot it is
+ *	acceptable to change the value from NULL => !NULL, opposite change
+ *	must be protected by dqptr_sem for write.
  */
 int dquot_initialize(struct inode *inode, int type)
 {
@@ -1258,6 +1261,9 @@ int dquot_initialize(struct inode *inode, int type)
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
+	/* Check whenever quota was already initialized */
+	if (!dqinit_needed(inode, type))
+		return 0;
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1274,7 +1280,7 @@ int dquot_initialize(struct inode *inode, int type)
 		got[cnt] = dqget(sb, id, cnt);
 	}
 
-	down_write(&sb_dqopt(sb)->dqptr_sem);
+	down_read(&sb_dqopt(sb)->dqptr_sem);
 	/* Having dqptr_sem we know NOQUOTA flags can't be altered... */
 	if (IS_NOQUOTA(inode))
 		goto out_err;
@@ -1284,13 +1290,11 @@ int dquot_initialize(struct inode *inode, int type)
 		/* Avoid races with quotaoff() */
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
-		if (!inode->i_dquot[cnt]) {
-			inode->i_dquot[cnt] = got[cnt];
+		if (!cmpxchg(&inode->i_dquot[cnt], NULL, got[cnt]))
 			got[cnt] = NULL;
-		}
 	}
 out_err:
-	up_write(&sb_dqopt(sb)->dqptr_sem);
+	up_read(&sb_dqopt(sb)->dqptr_sem);
 	/* Drop unused references */
 	dqput_all(got);
 	return ret;
@@ -1416,6 +1420,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 {
 	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
@@ -1432,14 +1437,16 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		goto out_unlock;
 	}
 
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		dquot[cnt] = inode->i_dquot[cnt];
 		warntype[cnt] = QUOTA_NL_NOWARN;
+	}
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
+		if (check_bdq(dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
 			ret = NO_QUOTA;
 			spin_unlock(&dq_data_lock);
@@ -1447,21 +1454,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
 		if (reserve)
-			dquot_resv_space(inode->i_dquot[cnt], number);
+			dquot_resv_space(dquot[cnt], number);
 		else
-			dquot_incr_space(inode->i_dquot[cnt], number);
+			dquot_incr_space(dquot[cnt], number);
 	}
 	inode_incr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
 
 	if (reserve)
 		goto out_flush_warn;
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 out_flush_warn:
-	flush_warnings(inode->i_dquot, warntype);
+	flush_warnings(dquot, warntype);
 out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
@@ -1487,13 +1494,16 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 {
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		dquot[cnt] = inode->i_dquot[cnt];
 		warntype[cnt] = QUOTA_NL_NOWARN;
+	}
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1501,24 +1511,24 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
+		if (check_idq(dquot[cnt], number, warntype+cnt)
 		    == NO_QUOTA)
 			goto warn_put_all;
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		dquot_incr_inodes(inode->i_dquot[cnt], number);
+		dquot_incr_inodes(dquot[cnt], number);
 	}
 	ret = QUOTA_OK;
 warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == QUOTA_OK)
-		mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
+		mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return ret;
 }
@@ -1528,6 +1538,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 {
 	int cnt;
 	int ret = QUOTA_OK;
+	struct dquot *dquot[MAXQUOTAS];
 
 	if (IS_NOQUOTA(inode)) {
 		inode_claim_rsv_space(inode, number);
@@ -1544,14 +1555,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_claim_reserved_space(inode->i_dquot[cnt],
-							number);
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (dquot[cnt])
+			dquot_claim_reserved_space(dquot[cnt], number);
 	}
 	/* Update inode bytes */
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
@@ -1565,6 +1576,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
+
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1582,22 +1595,23 @@ out_sub:
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (!dquot[cnt])
 			continue;
-		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_bdq_free(dquot[cnt], number);
 		if (reserve)
-			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+			dquot_free_reserved_space(dquot[cnt], number);
 		else
-			dquot_decr_space(inode->i_dquot[cnt], number);
+			dquot_decr_space(dquot[cnt], number);
 	}
 	inode_decr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
 
 	if (reserve)
 		goto out_unlock;
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 out_unlock:
-	flush_warnings(inode->i_dquot, warntype);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1625,6 +1639,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
+
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1639,14 +1655,15 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (!dquot[cnt])
 			continue;
-		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
-		dquot_decr_inodes(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_idq_free(dquot[cnt], number);
+		dquot_decr_inodes(dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
+	mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
-- 
1.6.0.4


       reply	other threads:[~2009-12-17  1:14 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <0KUR00I4YVKZG820@mail.2ka.mipt.ru>
2009-12-17  1:14 ` Dmitry Monakhov [this message]
2009-12-17  1:15 ` [PATCH] ext4: journalled quota seed-up Dmitry Monakhov
2009-12-17  1:33   ` Dmitry Monakhov
2009-12-17 10:07     ` Dmitry Monakhov
2009-12-17 16:06   ` Jan Kara
2009-12-17 16:37     ` Dmitry Monakhov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1261012477-14526-1-git-send-email-dmonakhov@openvz.org \
    --to=dmonakhov@openvz.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).