From: Dmitry Monakhov <dmonakhov@openvz.org>
To: Jan Kara <jack@suse.cz>
Cc: linux-fsdevel@vger.kernel.org, Dmitry Monakhov <dmonakhov@openvz.org>
Subject: [PATCH] quota: quota initialization improvements
Date: Thu, 17 Dec 2009 04:14:37 +0300 [thread overview]
Message-ID: <1261012477-14526-1-git-send-email-dmonakhov@openvz.org> (raw)
In-Reply-To: <0KUR00I4YVKZG820@mail.2ka.mipt.ru>
vfs_dq_init() is called from many places (almost from all generic
vfs functions). What's why this function is so performance critical.
Current dquot_initialize() implementation is far from optimal because:
Usually it called for inode which already has quota initialized
This fast path may be easily optimized. Just check i_dquot[].
In fact, no locking is needed here.
In case of slow path(where inode has no quota yet) it requires to hold
dqptr_sem for write. This assumtions comes from the the rule what states:
"Any changes with i_dquot[] must be protected by sem locked for write".
In fact changing i_dquot from NULL => !NULL (is what vfs_dq_init does)
is differ from changing it from !NULL => NULL (vfs_dq_drop, and others)
The difference is what if we allow i_dquot[] to change from
NULL => !NULL under dqptr_sem locked for read this will not break usual
charging functions (alloc/claim/free, etc.) because pointer assignment
is atomic for all arch. The only changes we have to do to prevent races
is just atomically copy i_dquot[] to local variable at the beginning of
each charging function.
Race between concurrent init() functions is solved by cmpxchg().
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
fs/quota/dquot.c | 87 ++++++++++++++++++++++++++++++++---------------------
1 files changed, 52 insertions(+), 35 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index de7e7de..6979224 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1246,6 +1246,9 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
* Initialize quota pointers in inode
* We do things in a bit complicated way but by that we avoid calling
* dqget() and thus filesystem callbacks under dqptr_sem.
+ * Lock dqptr_sem for read here, is acceptable, because i_dquot it is
+ * acceptable to change the value from NULL => !NULL, opposite change
+ * must be protected by dqptr_sem for write.
*/
int dquot_initialize(struct inode *inode, int type)
{
@@ -1258,6 +1261,9 @@ int dquot_initialize(struct inode *inode, int type)
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return 0;
+ /* Check whenever quota was already initialized */
+ if (!dqinit_needed(inode, type))
+ return 0;
/* First get references to structures we might need. */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1274,7 +1280,7 @@ int dquot_initialize(struct inode *inode, int type)
got[cnt] = dqget(sb, id, cnt);
}
- down_write(&sb_dqopt(sb)->dqptr_sem);
+ down_read(&sb_dqopt(sb)->dqptr_sem);
/* Having dqptr_sem we know NOQUOTA flags can't be altered... */
if (IS_NOQUOTA(inode))
goto out_err;
@@ -1284,13 +1290,11 @@ int dquot_initialize(struct inode *inode, int type)
/* Avoid races with quotaoff() */
if (!sb_has_quota_active(sb, cnt))
continue;
- if (!inode->i_dquot[cnt]) {
- inode->i_dquot[cnt] = got[cnt];
+ if (!cmpxchg(&inode->i_dquot[cnt], NULL, got[cnt]))
got[cnt] = NULL;
- }
}
out_err:
- up_write(&sb_dqopt(sb)->dqptr_sem);
+ up_read(&sb_dqopt(sb)->dqptr_sem);
/* Drop unused references */
dqput_all(got);
return ret;
@@ -1416,6 +1420,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
{
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
/*
* First test before acquiring mutex - solves deadlocks when we
@@ -1432,14 +1437,16 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
goto out_unlock;
}
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ dquot[cnt] = inode->i_dquot[cnt];
warntype[cnt] = QUOTA_NL_NOWARN;
+ }
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
+ if (check_bdq(dquot[cnt], number, warn, warntype+cnt)
== NO_QUOTA) {
ret = NO_QUOTA;
spin_unlock(&dq_data_lock);
@@ -1447,21 +1454,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
if (reserve)
- dquot_resv_space(inode->i_dquot[cnt], number);
+ dquot_resv_space(dquot[cnt], number);
else
- dquot_incr_space(inode->i_dquot[cnt], number);
+ dquot_incr_space(dquot[cnt], number);
}
inode_incr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
if (reserve)
goto out_flush_warn;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
out_flush_warn:
- flush_warnings(inode->i_dquot, warntype);
+ flush_warnings(dquot, warntype);
out_unlock:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
@@ -1487,13 +1494,16 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
{
int cnt, ret = NO_QUOTA;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return QUOTA_OK;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ dquot[cnt] = inode->i_dquot[cnt];
warntype[cnt] = QUOTA_NL_NOWARN;
+ }
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) {
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1501,24 +1511,24 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
}
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
+ if (check_idq(dquot[cnt], number, warntype+cnt)
== NO_QUOTA)
goto warn_put_all;
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- dquot_incr_inodes(inode->i_dquot[cnt], number);
+ dquot_incr_inodes(dquot[cnt], number);
}
ret = QUOTA_OK;
warn_put_all:
spin_unlock(&dq_data_lock);
if (ret == QUOTA_OK)
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquot);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
}
@@ -1528,6 +1538,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
{
int cnt;
int ret = QUOTA_OK;
+ struct dquot *dquot[MAXQUOTAS];
if (IS_NOQUOTA(inode)) {
inode_claim_rsv_space(inode, number);
@@ -1544,14 +1555,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_claim_reserved_space(inode->i_dquot[cnt],
- number);
+ dquot[cnt] = inode->i_dquot[cnt];
+ if (dquot[cnt])
+ dquot_claim_reserved_space(dquot[cnt], number);
}
/* Update inode bytes */
inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
@@ -1565,6 +1576,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
+
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1582,22 +1595,23 @@ out_sub:
}
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ dquot[cnt] = inode->i_dquot[cnt];
+ if (!dquot[cnt])
continue;
- warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
+ warntype[cnt] = info_bdq_free(dquot[cnt], number);
if (reserve)
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ dquot_free_reserved_space(dquot[cnt], number);
else
- dquot_decr_space(inode->i_dquot[cnt], number);
+ dquot_decr_space(dquot[cnt], number);
}
inode_decr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
if (reserve)
goto out_unlock;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
out_unlock:
- flush_warnings(inode->i_dquot, warntype);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
@@ -1625,6 +1639,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
+
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1639,14 +1655,15 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
}
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ dquot[cnt] = inode->i_dquot[cnt];
+ if (!dquot[cnt])
continue;
- warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
- dquot_decr_inodes(inode->i_dquot[cnt], number);
+ warntype[cnt] = info_idq_free(dquot[cnt], number);
+ dquot_decr_inodes(dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquot);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
--
1.6.0.4
next parent reply other threads:[~2009-12-17 1:14 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <0KUR00I4YVKZG820@mail.2ka.mipt.ru>
2009-12-17 1:14 ` Dmitry Monakhov [this message]
2009-12-17 1:15 ` [PATCH] ext4: journalled quota seed-up Dmitry Monakhov
2009-12-17 1:33 ` Dmitry Monakhov
2009-12-17 10:07 ` Dmitry Monakhov
2009-12-17 16:06 ` Jan Kara
2009-12-17 16:37 ` Dmitry Monakhov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1261012477-14526-1-git-send-email-dmonakhov@openvz.org \
--to=dmonakhov@openvz.org \
--cc=jack@suse.cz \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).