* [PATCH 01/11] quota: add wrapper function
2010-10-05 18:20 (unknown), Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 8:56 ` Christoph Hellwig
2010-10-06 10:01 ` Jan Kara
2010-10-05 18:20 ` [PATCH 02/11] quota: Convert dq_state_lock to per-sb dq_state_lock Dmitry Monakhov
` (10 subsequent siblings)
11 siblings, 2 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
This helps us to make code more readable.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/ext3/super.c | 2 +-
fs/ext4/super.c | 2 +-
fs/ocfs2/quota_global.c | 4 ++--
fs/quota/dquot.c | 18 +++++++++---------
fs/quota/quota_tree.c | 2 +-
fs/quota/quota_v1.c | 8 ++++----
include/linux/quota.h | 1 +
include/linux/quotaops.h | 4 ++++
8 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 15f63d3..c5b0c1d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2751,7 +2751,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
- return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+ return dq_opt(dquot)->files[dquot->dq_type];
}
static int ext3_write_dquot(struct dquot *dquot)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bcf86b3..301462a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3968,7 +3968,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
- return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+ return dq_opt(dquot)->files[dquot->dq_type];
}
static int ext4_write_dquot(struct dquot *dquot)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923..2c88a87 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -657,9 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
mlog_errno(status);
goto out;
}
- mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
+ mutex_lock(&dq_opt(dqopt)->dqio_mutex);
status = ocfs2_local_write_dquot(dquot);
- mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
+ mutex_unlock(&dq_opt(dqopt)->dqio_mutex);
ocfs2_commit_trans(osb, handle);
out:
mlog_exit(status);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 686c2b7..ce542e2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -346,7 +346,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
spin_lock(&dq_list_lock);
if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
- list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
+ list_add(&dquot->dq_dirty, &dq_opt(dquot)->
info[dquot->dq_type].dqi_dirty_list);
ret = 0;
}
@@ -401,7 +401,7 @@ EXPORT_SYMBOL(mark_info_dirty);
int dquot_acquire(struct dquot *dquot)
{
int ret = 0, ret2 = 0;
- struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ struct quota_info *dqopt = dq_opt(dquot);
mutex_lock(&dquot->dq_lock);
mutex_lock(&dqopt->dqio_mutex);
@@ -439,7 +439,7 @@ EXPORT_SYMBOL(dquot_acquire);
int dquot_commit(struct dquot *dquot)
{
int ret = 0, ret2 = 0;
- struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ struct quota_info *dqopt = dq_opt(dquot);
mutex_lock(&dqopt->dqio_mutex);
spin_lock(&dq_list_lock);
@@ -471,7 +471,7 @@ EXPORT_SYMBOL(dquot_commit);
int dquot_release(struct dquot *dquot)
{
int ret = 0, ret2 = 0;
- struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ struct quota_info *dqopt = dq_opt(dquot);
mutex_lock(&dquot->dq_lock);
/* Check whether we are not racing with some other dqget() */
@@ -1081,7 +1081,7 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
- if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ if (dq_opt(dquot)->flags & DQUOT_NEGATIVE_USAGE ||
dquot->dq_dqb.dqb_curinodes >= number)
dquot->dq_dqb.dqb_curinodes -= number;
else
@@ -1093,7 +1093,7 @@ static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
static void dquot_decr_space(struct dquot *dquot, qsize_t number)
{
- if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ if (dq_opt(dquot)->flags & DQUOT_NEGATIVE_USAGE ||
dquot->dq_dqb.dqb_curspace >= number)
dquot->dq_dqb.dqb_curspace -= number;
else
@@ -1203,7 +1203,7 @@ static void flush_warnings(struct dquot *const *dquots, char *warntype)
static int ignore_hardlimit(struct dquot *dquot)
{
- struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
+ struct mem_dqinfo *info = &dq_opt(dquot)->info[dquot->dq_type];
return capable(CAP_SYS_RESOURCE) &&
(info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
@@ -1241,7 +1241,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
dquot->dq_dqb.dqb_itime == 0) {
*warntype = QUOTA_NL_ISOFTWARN;
dquot->dq_dqb.dqb_itime = get_seconds() +
- sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
+ dq_opt(dquot)->info[dquot->dq_type].dqi_igrace;
}
return 0;
@@ -2331,7 +2331,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
{
struct mem_dqblk *dm = &dquot->dq_dqb;
int check_blim = 0, check_ilim = 0;
- struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
+ struct mem_dqinfo *dqi = &dq_opt(dquot)->info[dquot->dq_type];
if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
return -EINVAL;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 9e48874..3eb95ec 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -596,7 +596,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
#ifdef __QUOTA_QT_PARANOIA
/* Invalidated quota? */
- if (!sb_dqopt(dquot->dq_sb)->files[type]) {
+ if (!dq_opt(dquot)->files[type]) {
quota_error(sb, "Quota invalidated while reading!");
return -EIO;
}
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 34b37a6..c7ef325 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -57,7 +57,7 @@ static int v1_read_dqblk(struct dquot *dquot)
int type = dquot->dq_type;
struct v1_disk_dqblk dqblk;
- if (!sb_dqopt(dquot->dq_sb)->files[type])
+ if (!dq_opt(dquot)->files[type])
return -EINVAL;
/* Set structure to 0s in case read fails/is after end of file */
@@ -85,12 +85,12 @@ static int v1_commit_dqblk(struct dquot *dquot)
v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
if (dquot->dq_id == 0) {
dqblk.dqb_btime =
- sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
+ dq_opt(dquot)->info[type].dqi_bgrace;
dqblk.dqb_itime =
- sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
+ dq_opt(dquot)->info[type].dqi_igrace;
}
ret = 0;
- if (sb_dqopt(dquot->dq_sb)->files[type])
+ if (dq_opt(dquot)->files[type])
ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
(char *)&dqblk, sizeof(struct v1_disk_dqblk),
v1_dqoff(dquot->dq_id));
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9a85412..00e1b3d 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -182,6 +182,7 @@ enum {
#include <asm/atomic.h>
+
typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
typedef long long qsize_t; /* Type in which we store sizes */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9e09c9a..d81cbba 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -17,6 +17,10 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
{
return &sb->s_dquot;
}
+static inline struct quota_info* dq_opt(struct dquot *dq)
+{
+ return sb_dqopt(dq->dq_sb);
+}
/* i_mutex must being held */
static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 01/11] quota: add wrapper function
2010-10-05 18:20 ` [PATCH 01/11] quota: add wrapper function Dmitry Monakhov
@ 2010-10-06 8:56 ` Christoph Hellwig
2010-10-06 10:01 ` Jan Kara
1 sibling, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2010-10-06 8:56 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue, Oct 05, 2010 at 10:20:17PM +0400, Dmitry Monakhov wrote:
> This helps us to make code more readable.
It might be worth mentioning in the subject what wrapper you add..
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 01/11] quota: add wrapper function
2010-10-05 18:20 ` [PATCH 01/11] quota: add wrapper function Dmitry Monakhov
2010-10-06 8:56 ` Christoph Hellwig
@ 2010-10-06 10:01 ` Jan Kara
1 sibling, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 10:01 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:17, Dmitry Monakhov wrote:
> This helps us to make code more readable.
Well, wrappers aren't bad but if there's too many of them, things become
too obscured... But in this case I agree that the expression is common
enough to warrant a wrapper. It's just that the name "dq_opt" more suggest
to me some property of dquot itself, rather than a per-sb structure. A
better naming to me would be for example:
dqopt(sb) - returns quota_info structure of the superblock
sb_dqopt(dquot) - returns quota_info structure of the superblock dquot
belongs to...
Also with this naming it's clearer that these two functions return the same
thing, just for different types of objects.
Honza
> Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
> ---
> fs/ext3/super.c | 2 +-
> fs/ext4/super.c | 2 +-
> fs/ocfs2/quota_global.c | 4 ++--
> fs/quota/dquot.c | 18 +++++++++---------
> fs/quota/quota_tree.c | 2 +-
> fs/quota/quota_v1.c | 8 ++++----
> include/linux/quota.h | 1 +
> include/linux/quotaops.h | 4 ++++
> 8 files changed, 23 insertions(+), 18 deletions(-)
>
> diff --git a/fs/ext3/super.c b/fs/ext3/super.c
> index 15f63d3..c5b0c1d 100644
> --- a/fs/ext3/super.c
> +++ b/fs/ext3/super.c
> @@ -2751,7 +2751,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
>
> static inline struct inode *dquot_to_inode(struct dquot *dquot)
> {
> - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
> + return dq_opt(dquot)->files[dquot->dq_type];
> }
>
> static int ext3_write_dquot(struct dquot *dquot)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index bcf86b3..301462a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -3968,7 +3968,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
>
> static inline struct inode *dquot_to_inode(struct dquot *dquot)
> {
> - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
> + return dq_opt(dquot)->files[dquot->dq_type];
> }
>
> static int ext4_write_dquot(struct dquot *dquot)
> diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
> index 4607923..2c88a87 100644
> --- a/fs/ocfs2/quota_global.c
> +++ b/fs/ocfs2/quota_global.c
> @@ -657,9 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
> mlog_errno(status);
> goto out;
> }
> - mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
> + mutex_lock(&dq_opt(dqopt)->dqio_mutex);
> status = ocfs2_local_write_dquot(dquot);
> - mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
> + mutex_unlock(&dq_opt(dqopt)->dqio_mutex);
> ocfs2_commit_trans(osb, handle);
> out:
> mlog_exit(status);
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 686c2b7..ce542e2 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -346,7 +346,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
>
> spin_lock(&dq_list_lock);
> if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
> - list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
> + list_add(&dquot->dq_dirty, &dq_opt(dquot)->
> info[dquot->dq_type].dqi_dirty_list);
> ret = 0;
> }
> @@ -401,7 +401,7 @@ EXPORT_SYMBOL(mark_info_dirty);
> int dquot_acquire(struct dquot *dquot)
> {
> int ret = 0, ret2 = 0;
> - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
> + struct quota_info *dqopt = dq_opt(dquot);
>
> mutex_lock(&dquot->dq_lock);
> mutex_lock(&dqopt->dqio_mutex);
> @@ -439,7 +439,7 @@ EXPORT_SYMBOL(dquot_acquire);
> int dquot_commit(struct dquot *dquot)
> {
> int ret = 0, ret2 = 0;
> - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
> + struct quota_info *dqopt = dq_opt(dquot);
>
> mutex_lock(&dqopt->dqio_mutex);
> spin_lock(&dq_list_lock);
> @@ -471,7 +471,7 @@ EXPORT_SYMBOL(dquot_commit);
> int dquot_release(struct dquot *dquot)
> {
> int ret = 0, ret2 = 0;
> - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
> + struct quota_info *dqopt = dq_opt(dquot);
>
> mutex_lock(&dquot->dq_lock);
> /* Check whether we are not racing with some other dqget() */
> @@ -1081,7 +1081,7 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
>
> static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
> {
> - if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
> + if (dq_opt(dquot)->flags & DQUOT_NEGATIVE_USAGE ||
> dquot->dq_dqb.dqb_curinodes >= number)
> dquot->dq_dqb.dqb_curinodes -= number;
> else
> @@ -1093,7 +1093,7 @@ static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
>
> static void dquot_decr_space(struct dquot *dquot, qsize_t number)
> {
> - if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
> + if (dq_opt(dquot)->flags & DQUOT_NEGATIVE_USAGE ||
> dquot->dq_dqb.dqb_curspace >= number)
> dquot->dq_dqb.dqb_curspace -= number;
> else
> @@ -1203,7 +1203,7 @@ static void flush_warnings(struct dquot *const *dquots, char *warntype)
>
> static int ignore_hardlimit(struct dquot *dquot)
> {
> - struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
> + struct mem_dqinfo *info = &dq_opt(dquot)->info[dquot->dq_type];
>
> return capable(CAP_SYS_RESOURCE) &&
> (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
> @@ -1241,7 +1241,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
> dquot->dq_dqb.dqb_itime == 0) {
> *warntype = QUOTA_NL_ISOFTWARN;
> dquot->dq_dqb.dqb_itime = get_seconds() +
> - sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
> + dq_opt(dquot)->info[dquot->dq_type].dqi_igrace;
> }
>
> return 0;
> @@ -2331,7 +2331,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> {
> struct mem_dqblk *dm = &dquot->dq_dqb;
> int check_blim = 0, check_ilim = 0;
> - struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
> + struct mem_dqinfo *dqi = &dq_opt(dquot)->info[dquot->dq_type];
>
> if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
> return -EINVAL;
> diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> index 9e48874..3eb95ec 100644
> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -596,7 +596,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
>
> #ifdef __QUOTA_QT_PARANOIA
> /* Invalidated quota? */
> - if (!sb_dqopt(dquot->dq_sb)->files[type]) {
> + if (!dq_opt(dquot)->files[type]) {
> quota_error(sb, "Quota invalidated while reading!");
> return -EIO;
> }
> diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
> index 34b37a6..c7ef325 100644
> --- a/fs/quota/quota_v1.c
> +++ b/fs/quota/quota_v1.c
> @@ -57,7 +57,7 @@ static int v1_read_dqblk(struct dquot *dquot)
> int type = dquot->dq_type;
> struct v1_disk_dqblk dqblk;
>
> - if (!sb_dqopt(dquot->dq_sb)->files[type])
> + if (!dq_opt(dquot)->files[type])
> return -EINVAL;
>
> /* Set structure to 0s in case read fails/is after end of file */
> @@ -85,12 +85,12 @@ static int v1_commit_dqblk(struct dquot *dquot)
> v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
> if (dquot->dq_id == 0) {
> dqblk.dqb_btime =
> - sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
> + dq_opt(dquot)->info[type].dqi_bgrace;
> dqblk.dqb_itime =
> - sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
> + dq_opt(dquot)->info[type].dqi_igrace;
> }
> ret = 0;
> - if (sb_dqopt(dquot->dq_sb)->files[type])
> + if (dq_opt(dquot)->files[type])
> ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
> (char *)&dqblk, sizeof(struct v1_disk_dqblk),
> v1_dqoff(dquot->dq_id));
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index 9a85412..00e1b3d 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -182,6 +182,7 @@ enum {
>
> #include <asm/atomic.h>
>
> +
> typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
> typedef long long qsize_t; /* Type in which we store sizes */
>
> diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
> index 9e09c9a..d81cbba 100644
> --- a/include/linux/quotaops.h
> +++ b/include/linux/quotaops.h
> @@ -17,6 +17,10 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
> {
> return &sb->s_dquot;
> }
> +static inline struct quota_info* dq_opt(struct dquot *dq)
> +{
> + return sb_dqopt(dq->dq_sb);
> +}
>
> /* i_mutex must being held */
> static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
> --
> 1.6.6.1
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 02/11] quota: Convert dq_state_lock to per-sb dq_state_lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 01/11] quota: add wrapper function Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 10:04 ` Jan Kara
2010-10-05 18:20 ` [PATCH 03/11] quota: add quota format lock Dmitry Monakhov
` (9 subsequent siblings)
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Currently dq_state_lock is global, which is bad for scalability.
In fact different super_blocks has no shared quota's data.
So we may simply convert global lock to per-sb lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 31 +++++++++++++++----------------
fs/super.c | 1 +
include/linux/quota.h | 1 +
3 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ce542e2..eddf5cf 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -128,7 +128,6 @@
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
@@ -819,13 +818,13 @@ struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
return NULL;
we_slept:
spin_lock(&dq_list_lock);
- spin_lock(&dq_state_lock);
+ spin_lock(&sb_dqopt(sb)->dq_state_lock);
if (!sb_has_quota_active(sb, type)) {
- spin_unlock(&dq_state_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_state_lock);
spin_unlock(&dq_list_lock);
goto out;
}
- spin_unlock(&dq_state_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_state_lock);
dquot = find_dquot(hashent, sb, id, type);
if (!dquot) {
@@ -1918,24 +1917,24 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
continue;
if (flags & DQUOT_SUSPENDED) {
- spin_lock(&dq_state_lock);
+ spin_lock(&dqopt->dq_state_lock);
dqopt->flags |=
dquot_state_flag(DQUOT_SUSPENDED, cnt);
- spin_unlock(&dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
} else {
- spin_lock(&dq_state_lock);
+ spin_lock(&dqopt->dq_state_lock);
dqopt->flags &= ~dquot_state_flag(flags, cnt);
/* Turning off suspended quotas? */
if (!sb_has_quota_loaded(sb, cnt) &&
sb_has_quota_suspended(sb, cnt)) {
dqopt->flags &= ~dquot_state_flag(
DQUOT_SUSPENDED, cnt);
- spin_unlock(&dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
iput(dqopt->files[cnt]);
dqopt->files[cnt] = NULL;
continue;
}
- spin_unlock(&dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
}
/* We still have to keep quota loaded? */
@@ -2112,9 +2111,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
goto out_file_init;
}
mutex_unlock(&dqopt->dqio_mutex);
- spin_lock(&dq_state_lock);
+ spin_lock(&dqopt->dq_state_lock);
dqopt->flags |= dquot_state_flag(flags, type);
- spin_unlock(&dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
add_dquot_ref(sb, type);
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2159,12 +2158,12 @@ int dquot_resume(struct super_block *sb, int type)
}
inode = dqopt->files[cnt];
dqopt->files[cnt] = NULL;
- spin_lock(&dq_state_lock);
+ spin_lock(&dqopt->dq_state_lock);
flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED,
cnt);
dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
- spin_unlock(&dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
mutex_unlock(&dqopt->dqonoff_mutex);
flags = dquot_generic_flag(flags, cnt);
@@ -2228,9 +2227,9 @@ int dquot_enable(struct inode *inode, int type, int format_id,
ret = -EBUSY;
goto out_lock;
}
- spin_lock(&dq_state_lock);
- sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
- spin_unlock(&dq_state_lock);
+ spin_lock(&dqopt->dq_state_lock);
+ dqopt->flags |= dquot_state_flag(flags, type);
+ spin_unlock(&dqopt->dq_state_lock);
out_lock:
mutex_unlock(&dqopt->dqonoff_mutex);
return ret;
diff --git a/fs/super.c b/fs/super.c
index 8819e3a..b54cb8b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -105,6 +105,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
mutex_init(&s->s_dquot.dqio_mutex);
mutex_init(&s->s_dquot.dqonoff_mutex);
+ spin_lock_init(&s->s_dquot.dq_state_lock);
init_rwsem(&s->s_dquot.dqptr_sem);
init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 00e1b3d..e39b01c 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -398,6 +398,7 @@ struct quota_info {
struct mutex dqio_mutex; /* lock device while I/O in progress */
struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
+ spinlock_t dq_state_lock; /* serialize quota state changes*/
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* [PATCH 03/11] quota: add quota format lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 01/11] quota: add wrapper function Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 02/11] quota: Convert dq_state_lock to per-sb dq_state_lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 10:05 ` Jan Kara
2010-10-05 18:20 ` [PATCH 04/11] quota: make dquot lists per-sb Dmitry Monakhov
` (8 subsequent siblings)
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Currently dq_list_lock is responsible for quota format protection
which is counter productive. Introduce didicated lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 18 +++++++++---------
1 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index eddf5cf..5e0b099 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,6 @@
/*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats.
* dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
* also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -128,6 +127,7 @@
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_fmt_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
@@ -158,10 +158,10 @@ static struct kmem_cache *dquot_cachep;
int register_quota_format(struct quota_format_type *fmt)
{
- spin_lock(&dq_list_lock);
+ spin_lock(&dq_fmt_lock);
fmt->qf_next = quota_formats;
quota_formats = fmt;
- spin_unlock(&dq_list_lock);
+ spin_unlock(&dq_fmt_lock);
return 0;
}
EXPORT_SYMBOL(register_quota_format);
@@ -170,13 +170,13 @@ void unregister_quota_format(struct quota_format_type *fmt)
{
struct quota_format_type **actqf;
- spin_lock(&dq_list_lock);
+ spin_lock(&dq_fmt_lock);
for (actqf = "a_formats; *actqf && *actqf != fmt;
actqf = &(*actqf)->qf_next)
;
if (*actqf)
*actqf = (*actqf)->qf_next;
- spin_unlock(&dq_list_lock);
+ spin_unlock(&dq_fmt_lock);
}
EXPORT_SYMBOL(unregister_quota_format);
@@ -184,14 +184,14 @@ static struct quota_format_type *find_quota_format(int id)
{
struct quota_format_type *actqf;
- spin_lock(&dq_list_lock);
+ spin_lock(&dq_fmt_lock);
for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id;
actqf = actqf->qf_next)
;
if (!actqf || !try_module_get(actqf->qf_owner)) {
int qm;
- spin_unlock(&dq_list_lock);
+ spin_unlock(&dq_fmt_lock);
for (qm = 0; module_names[qm].qm_fmt_id &&
module_names[qm].qm_fmt_id != id; qm++)
@@ -200,14 +200,14 @@ static struct quota_format_type *find_quota_format(int id)
request_module(module_names[qm].qm_mod_name))
return NULL;
- spin_lock(&dq_list_lock);
+ spin_lock(&dq_fmt_lock);
for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id;
actqf = actqf->qf_next)
;
if (actqf && !try_module_get(actqf->qf_owner))
actqf = NULL;
}
- spin_unlock(&dq_list_lock);
+ spin_unlock(&dq_fmt_lock);
return actqf;
}
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* [PATCH 04/11] quota: make dquot lists per-sb
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (2 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 03/11] quota: add quota format lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 8:57 ` Christoph Hellwig
2010-10-06 10:22 ` Jan Kara
2010-10-05 18:20 ` [PATCH 05/11] quota: make per-sb hash array Dmitry Monakhov
` (7 subsequent siblings)
11 siblings, 2 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Currently quota lists are global which is very bad for scalability.
* inuse_lists -> sb->s_dquot->dq_inuse_list
* free_lists -> sb->s_dquot->dq_free_lists
* Add per sb lock for quota's lists protection
Do not remove dq_lists_lock is used now only for protecting quota_hash
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 80 ++++++++++++++++++++++++++++++++++++++-----------
fs/super.c | 3 ++
include/linux/quota.h | 4 ++
3 files changed, 69 insertions(+), 18 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5e0b099..f2092d1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -90,7 +90,8 @@
* about latest values take it as well.
*
* The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
- * dq_list_lock > dq_state_lock
+ * dq_list_lock > sb->s_dquot->dq_state_lock
+ * dq_list_lock > sb->s_dquot->dq_list_lock
*
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
@@ -236,8 +237,6 @@ static void put_quota_format(struct quota_format_type *fmt)
* mechanism to locate a specific dquot.
*/
-static LIST_HEAD(inuse_list);
-static LIST_HEAD(free_dquots);
static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;
@@ -289,7 +288,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
/* Add a dquot to the tail of the free list */
static inline void put_dquot_last(struct dquot *dquot)
{
- list_add_tail(&dquot->dq_free, &free_dquots);
+ list_add_tail(&dquot->dq_free, &dq_opt(dquot)->dq_free_list);
dqstats_inc(DQST_FREE_DQUOTS);
}
@@ -305,7 +304,7 @@ static inline void put_inuse(struct dquot *dquot)
{
/* We add to the back of inuse list so we don't have to restart
* when traversing this list and we block */
- list_add_tail(&dquot->dq_inuse, &inuse_list);
+ list_add_tail(&dquot->dq_inuse, &dq_opt(dquot)->dq_inuse_list);
dqstats_inc(DQST_ALLOC_DQUOTS);
}
@@ -338,17 +337,20 @@ static inline int mark_dquot_dirty(struct dquot *dquot)
int dquot_mark_dquot_dirty(struct dquot *dquot)
{
int ret = 1;
+ struct quota_info *dqopt = dq_opt(dquot);
/* If quota is dirty already, we don't have to acquire dq_list_lock */
if (test_bit(DQ_MOD_B, &dquot->dq_flags))
return 1;
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
- list_add(&dquot->dq_dirty, &dq_opt(dquot)->
- info[dquot->dq_type].dqi_dirty_list);
+ list_add(&dquot->dq_dirty,
+ &dqopt->info[dquot->dq_type].dqi_dirty_list);
ret = 0;
}
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
return ret;
}
@@ -442,10 +444,13 @@ int dquot_commit(struct dquot *dquot)
mutex_lock(&dqopt->dqio_mutex);
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
if (!clear_dquot_dirty(dquot)) {
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
goto out_sem;
}
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
/* Inactive dquot can be only if there was error during read/init
* => we have better not writing it */
@@ -515,10 +520,12 @@ static inline void do_destroy_dquot(struct dquot *dquot)
static void invalidate_dquots(struct super_block *sb, int type)
{
struct dquot *dquot, *tmp;
+ struct quota_info *dqopt = sb_dqopt(sb);
restart:
spin_lock(&dq_list_lock);
- list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
+ spin_lock(&dqopt->dq_list_lock);
+ list_for_each_entry_safe(dquot, tmp, &dqopt->dq_inuse_list, dq_inuse) {
if (dquot->dq_sb != sb)
continue;
if (dquot->dq_type != type)
@@ -530,6 +537,7 @@ restart:
atomic_inc(&dquot->dq_count);
prepare_to_wait(&dquot->dq_wait_unused, &wait,
TASK_UNINTERRUPTIBLE);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
/* Once dqput() wakes us up, we know it's time to free
* the dquot.
@@ -556,6 +564,7 @@ restart:
remove_inuse(dquot);
do_destroy_dquot(dquot);
}
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
}
@@ -565,17 +574,20 @@ int dquot_scan_active(struct super_block *sb,
unsigned long priv)
{
struct dquot *dquot, *old_dquot = NULL;
+ struct quota_info *dqopt = sb_dqopt(sb);
int ret = 0;
- mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+ mutex_lock(&dqopt->dqonoff_mutex);
spin_lock(&dq_list_lock);
- list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+ spin_lock(&dqopt->dq_list_lock);
+ list_for_each_entry(dquot, &dqopt->dq_inuse_list, dq_inuse) {
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
continue;
if (dquot->dq_sb != sb)
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
@@ -584,13 +596,15 @@ int dquot_scan_active(struct super_block *sb,
if (ret < 0)
goto out;
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
}
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
out:
dqput(old_dquot);
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ mutex_unlock(&dqopt->dqonoff_mutex);
return ret;
}
EXPORT_SYMBOL(dquot_scan_active);
@@ -609,6 +623,7 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
if (!sb_has_quota_active(sb, cnt))
continue;
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
dirty = &dqopt->info[cnt].dqi_dirty_list;
while (!list_empty(dirty)) {
dquot = list_first_entry(dirty, struct dquot,
@@ -622,12 +637,16 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
* holding reference so we can safely just increase
* use count */
atomic_inc(&dquot->dq_count);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
sb->dq_op->write_dquot(dquot);
dqput(dquot);
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
+
}
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
}
@@ -670,23 +689,30 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
EXPORT_SYMBOL(dquot_quota_sync);
/* Free unused dquots from cache */
-static void prune_dqcache(int count)
+static void prune_one_sb_dqcache(struct super_block *sb, void *arg)
{
struct list_head *head;
struct dquot *dquot;
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int count = *(int*) arg;
- head = free_dquots.prev;
- while (head != &free_dquots && count) {
+ spin_lock(&dqopt->dq_list_lock);
+ head = dqopt->dq_free_list.prev;
+ while (head != &dqopt->dq_free_list && count) {
dquot = list_entry(head, struct dquot, dq_free);
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
do_destroy_dquot(dquot);
count--;
- head = free_dquots.prev;
+ head = dqopt->dq_free_list.prev;
}
+ spin_unlock(&dqopt->dq_list_lock);
+}
+static void prune_dqcache(int count)
+{
+ iterate_supers(prune_one_sb_dqcache, &count);
}
-
/*
* This is called from kswapd when we think we need some
* more memory
@@ -715,6 +741,7 @@ static struct shrinker dqcache_shrinker = {
void dqput(struct dquot *dquot)
{
int ret;
+ struct quota_info *dqopt;
if (!dquot)
return;
@@ -725,9 +752,11 @@ void dqput(struct dquot *dquot)
BUG();
}
#endif
+ dqopt = dq_opt(dquot);
dqstats_inc(DQST_DROPS);
we_slept:
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
/* We have more than one user... nothing to do */
atomic_dec(&dquot->dq_count);
@@ -735,11 +764,13 @@ we_slept:
if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
atomic_read(&dquot->dq_count) == 1)
wake_up(&dquot->dq_wait_unused);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
return;
}
/* Need to release dquot? */
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
ret = dquot->dq_sb->dq_op->write_dquot(dquot);
@@ -752,7 +783,9 @@ we_slept:
* infinite loop here
*/
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
clear_dquot_dirty(dquot);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
}
goto we_slept;
@@ -760,6 +793,7 @@ we_slept:
/* Clear flag in case dquot was inactive (something bad happened) */
clear_dquot_dirty(dquot);
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
dquot->dq_sb->dq_op->release_dquot(dquot);
goto we_slept;
@@ -770,6 +804,7 @@ we_slept:
BUG_ON(!list_empty(&dquot->dq_free));
#endif
put_dquot_last(dquot);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
}
EXPORT_SYMBOL(dqput);
@@ -813,14 +848,17 @@ struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
struct dquot *dquot = NULL, *empty = NULL;
+ struct quota_info *dqopt = sb_dqopt(sb);
if (!sb_has_quota_active(sb, type))
return NULL;
we_slept:
spin_lock(&dq_list_lock);
- spin_lock(&sb_dqopt(sb)->dq_state_lock);
+ spin_lock(&dqopt->dq_list_lock);
+ spin_lock(&dqopt->dq_state_lock);
if (!sb_has_quota_active(sb, type)) {
- spin_unlock(&sb_dqopt(sb)->dq_state_lock);
+ spin_unlock(&dqopt->dq_state_lock);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
goto out;
}
@@ -829,6 +867,7 @@ we_slept:
dquot = find_dquot(hashent, sb, id, type);
if (!dquot) {
if (!empty) {
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
empty = get_empty_dquot(sb, type);
if (!empty)
@@ -842,12 +881,14 @@ we_slept:
put_inuse(dquot);
/* hash it first so it can be found */
insert_dquot_hash(dquot);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
} else {
if (!atomic_read(&dquot->dq_count))
remove_free_dquot(dquot);
atomic_inc(&dquot->dq_count);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
dqstats_inc(DQST_CACHE_HITS);
dqstats_inc(DQST_LOOKUPS);
@@ -953,6 +994,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
struct list_head *tofree_head)
{
struct dquot *dquot = inode->i_dquot[type];
+ struct quota_info *dqopt = sb_dqopt(inode->i_sb);
inode->i_dquot[type] = NULL;
if (dquot) {
@@ -964,9 +1006,11 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
atomic_read(&dquot->dq_count));
#endif
spin_lock(&dq_list_lock);
+ spin_lock(&dqopt->dq_list_lock);
/* As dquot must have currently users it can't be on
* the free list... */
list_add(&dquot->dq_free, tofree_head);
+ spin_unlock(&dqopt->dq_list_lock);
spin_unlock(&dq_list_lock);
return 1;
}
diff --git a/fs/super.c b/fs/super.c
index b54cb8b..852866b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -107,6 +107,9 @@ static struct super_block *alloc_super(struct file_system_type *type)
mutex_init(&s->s_dquot.dqonoff_mutex);
spin_lock_init(&s->s_dquot.dq_state_lock);
init_rwsem(&s->s_dquot.dqptr_sem);
+ spin_lock_init(&s->s_dquot.dq_list_lock);
+ INIT_LIST_HEAD(&s->s_dquot.dq_inuse_list);
+ INIT_LIST_HEAD(&s->s_dquot.dq_free_list);
init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e39b01c..134c18d 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -399,6 +399,10 @@ struct quota_info {
struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
spinlock_t dq_state_lock; /* serialize quota state changes*/
+ spinlock_t dq_list_lock; /* protect lists */
+ struct list_head dq_inuse_list; /* list of inused dquotas */
+ struct list_head dq_free_list; /* list of free dquotas */
+
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 04/11] quota: make dquot lists per-sb
2010-10-05 18:20 ` [PATCH 04/11] quota: make dquot lists per-sb Dmitry Monakhov
@ 2010-10-06 8:57 ` Christoph Hellwig
2010-10-06 9:39 ` Dmitry
2010-10-06 10:22 ` Jan Kara
1 sibling, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2010-10-06 8:57 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue, Oct 05, 2010 at 10:20:20PM +0400, Dmitry Monakhov wrote:
> Currently quota lists are global which is very bad for scalability.
> * inuse_lists -> sb->s_dquot->dq_inuse_list
> * free_lists -> sb->s_dquot->dq_free_lists
> * Add per sb lock for quota's lists protection
>
> Do not remove dq_lists_lock is used now only for protecting quota_hash
I'm not a big fan of adding tons of new fields to the generic superblock
for a handfull filesystems actually using it. Can you look into
introducing a quota_sb structure that the few filesystems that use
it can embedd into their superblock?
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 04/11] quota: make dquot lists per-sb
2010-10-06 8:57 ` Christoph Hellwig
@ 2010-10-06 9:39 ` Dmitry
0 siblings, 0 replies; 35+ messages in thread
From: Dmitry @ 2010-10-06 9:39 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Wed, 6 Oct 2010 04:57:44 -0400, Christoph Hellwig <hch@lst.de> wrote:
> On Tue, Oct 05, 2010 at 10:20:20PM +0400, Dmitry Monakhov wrote:
> > Currently quota lists are global which is very bad for scalability.
> > * inuse_lists -> sb->s_dquot->dq_inuse_list
> > * free_lists -> sb->s_dquot->dq_free_lists
> > * Add per sb lock for quota's lists protection
> >
> > Do not remove dq_lists_lock is used now only for protecting quota_hash
>
> I'm not a big fan of adding tons of new fields to the generic superblock
> for a handfull filesystems actually using it. Can you look into
> introducing a quota_sb structure that the few filesystems that use
> it can embedd into their superblock?
Yep, seems reasonable. Will handle that in next version.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 04/11] quota: make dquot lists per-sb
2010-10-05 18:20 ` [PATCH 04/11] quota: make dquot lists per-sb Dmitry Monakhov
2010-10-06 8:57 ` Christoph Hellwig
@ 2010-10-06 10:22 ` Jan Kara
2010-10-06 10:40 ` Dmitry
1 sibling, 1 reply; 35+ messages in thread
From: Jan Kara @ 2010-10-06 10:22 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:20, Dmitry Monakhov wrote:
> Currently quota lists are global which is very bad for scalability.
> * inuse_lists -> sb->s_dquot->dq_inuse_list
> * free_lists -> sb->s_dquot->dq_free_lists
> * Add per sb lock for quota's lists protection
>
> Do not remove dq_lists_lock is used now only for protecting quota_hash
Hmm, wouldn't be it easier to just convert lists to per-sb ones in this
patch, convert hash in the next patch, and convert the spinlock in the last
one. You wouldn't have to have both spinlocks in parallel... But that's
just a nit ;).
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 04/11] quota: make dquot lists per-sb
2010-10-06 10:22 ` Jan Kara
@ 2010-10-06 10:40 ` Dmitry
2010-10-06 10:54 ` Jan Kara
0 siblings, 1 reply; 35+ messages in thread
From: Dmitry @ 2010-10-06 10:40 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Wed, 6 Oct 2010 12:22:24 +0200, Jan Kara <jack@suse.cz> wrote:
> On Tue 05-10-10 22:20:20, Dmitry Monakhov wrote:
> > Currently quota lists are global which is very bad for scalability.
> > * inuse_lists -> sb->s_dquot->dq_inuse_list
> > * free_lists -> sb->s_dquot->dq_free_lists
> > * Add per sb lock for quota's lists protection
> >
> > Do not remove dq_lists_lock is used now only for protecting quota_hash
> Hmm, wouldn't be it easier to just convert lists to per-sb ones in this
> patch, convert hash in the next patch, and convert the spinlock in the last
> one. You wouldn't have to have both spinlocks in parallel... But that's
> just a nit ;).
I've hoped that this would be easier to read and review.
IMHO This that approach one may understand my ideas just by looking
to diff. But if you want i can destroy and reassemble locking logic in
one patch :)
>
> Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 04/11] quota: make dquot lists per-sb
2010-10-06 10:40 ` Dmitry
@ 2010-10-06 10:54 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 10:54 UTC (permalink / raw)
To: Dmitry; +Cc: Jan Kara, linux-fsdevel, hch, Dmitry Monakhov
On Wed 06-10-10 14:40:06, Dmitry wrote:
> On Wed, 6 Oct 2010 12:22:24 +0200, Jan Kara <jack@suse.cz> wrote:
> > On Tue 05-10-10 22:20:20, Dmitry Monakhov wrote:
> > > Currently quota lists are global which is very bad for scalability.
> > > * inuse_lists -> sb->s_dquot->dq_inuse_list
> > > * free_lists -> sb->s_dquot->dq_free_lists
> > > * Add per sb lock for quota's lists protection
> > >
> > > Do not remove dq_lists_lock is used now only for protecting quota_hash
> > Hmm, wouldn't be it easier to just convert lists to per-sb ones in this
> > patch, convert hash in the next patch, and convert the spinlock in the last
> > one. You wouldn't have to have both spinlocks in parallel... But that's
> > just a nit ;).
> I've hoped that this would be easier to read and review.
> IMHO This that approach one may understand my ideas just by looking
> to diff.
Well, code-wise it would be easier to review if the patches were split as
I suggested and the intentions could be explained in the changelog...
> But if you want i can destroy and reassemble locking logic in
> one patch :)
You don't have to redo the patch. In this case, it's simple enough to
verify with your split as well. My comment was mostly meant as a suggestion
for future...
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 05/11] quota: make per-sb hash array
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (3 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 04/11] quota: make dquot lists per-sb Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 10:38 ` Jan Kara
2010-10-05 18:20 ` [PATCH 06/11] quota: remove global dq_list_lock Dmitry Monakhov
` (6 subsequent siblings)
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Currently quota_hash[] is global, which is bad for scalability.
Also is is the last user of global dq_list_lock.
It is reasonable to introduce didicated hash for each super_block
which use quota.
per-sb hash will be allocated only when necessery (on first quota_on())
Protected by per-sb dq_list_lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 111 ++++++++++++++++++++++++++++++++----------------
include/linux/quota.h | 10 ++++-
2 files changed, 83 insertions(+), 38 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f2092d1..e93b323 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -220,7 +220,7 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
* The quota code uses three lists for dquot management: the inuse_list,
- * free_dquots, and dquot_hash[] array. A single dquot structure may be
+ * free_dquots, and dq_hash[] array. A single dquot structure may be
* on all three lists, depending on its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
@@ -233,13 +233,10 @@ static void put_quota_format(struct quota_format_type *fmt)
* dquot is invalidated it's completely released from memory.
*
* Dquots with a specific identity (device, type and id) are placed on
- * one of the dquot_hash[] hash chains. The provides an efficient search
+ * one of the dq_hash[] hash chains. The provides an efficient search
* mechanism to locate a specific dquot.
*/
-static unsigned int dq_hash_bits, dq_hash_mask;
-static struct hlist_head *dquot_hash;
-
struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
@@ -251,8 +248,9 @@ hashfn(const struct super_block *sb, unsigned int id, int type)
{
unsigned long tmp;
- tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
- return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask;
+ tmp = id * (MAXQUOTAS - type);
+ return (tmp + (tmp >> sb->s_dquot.dq_hash.bits)) &
+ sb->s_dquot.dq_hash.mask;
}
/*
@@ -261,7 +259,8 @@ hashfn(const struct super_block *sb, unsigned int id, int type)
static inline void insert_dquot_hash(struct dquot *dquot)
{
struct hlist_head *head;
- head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
+ head = dq_opt(dquot)->dq_hash.head +
+ hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
hlist_add_head(&dquot->dq_hash, head);
}
@@ -270,13 +269,17 @@ static inline void remove_dquot_hash(struct dquot *dquot)
hlist_del_init(&dquot->dq_hash);
}
-static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
+static struct dquot *find_dquot(struct super_block *sb,
unsigned int id, int type)
{
struct hlist_node *node;
struct dquot *dquot;
+ unsigned int hashent = hashfn(sb, id, type);
+
+ if (!sb_dqopt(sb)->dq_hash.head)
+ return NULL;
- hlist_for_each (node, dquot_hash+hashent) {
+ hlist_for_each(node, sb_dqopt(sb)->dq_hash.head + hashent) {
dquot = hlist_entry(node, struct dquot, dq_hash);
if (dquot->dq_sb == sb && dquot->dq_id == id &&
dquot->dq_type == type)
@@ -846,7 +849,6 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
*/
struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
- unsigned int hashent = hashfn(sb, id, type);
struct dquot *dquot = NULL, *empty = NULL;
struct quota_info *dqopt = sb_dqopt(sb);
@@ -864,7 +866,7 @@ we_slept:
}
spin_unlock(&sb_dqopt(sb)->dq_state_lock);
- dquot = find_dquot(hashent, sb, id, type);
+ dquot = find_dquot(sb, id, type);
if (!dquot) {
if (!empty) {
spin_unlock(&dqopt->dq_list_lock);
@@ -1925,6 +1927,47 @@ int dquot_file_open(struct inode *inode, struct file *file)
}
EXPORT_SYMBOL(dquot_file_open);
+int dquot_hash_alloc(struct super_block *sb, int order)
+{
+ unsigned long nr_hash, i;
+ struct hlist_head *hash_array;
+ struct dquot_hash *dq_hash = &sb_dqopt(sb)->dq_hash;
+
+ hash_array = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order);
+ if (!hash_array)
+ return -ENOMEM;
+
+ /* Find power-of-two hlist_heads which can fit into allocation */
+ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
+ dq_hash->bits = 0;
+ do {
+ dq_hash->bits++;
+ } while (nr_hash >> dq_hash->bits);
+ dq_hash->bits--;
+
+ nr_hash = 1UL << dq_hash->bits;
+ dq_hash->mask = nr_hash - 1;
+ for (i = 0; i < nr_hash; i++)
+ INIT_HLIST_HEAD(hash_array + i);
+ dq_hash->order = order;
+ dq_hash->head = hash_array;
+ return 0;
+}
+
+void dquot_hash_destroy(struct super_block *sb)
+{
+
+ struct dquot_hash * dq_hash = &sb_dqopt(sb)->dq_hash;
+ unsigned long i, nr_hash = 1UL << dq_hash->bits;
+ unsigned long addr = (unsigned long )dq_hash->head;
+
+ for (i = 0; i < nr_hash; i++)
+ BUG_ON(!hlist_empty(dq_hash->head + i));
+
+ dq_hash->head = NULL;
+ free_pages(addr, dq_hash->order);
+}
+
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
@@ -2006,6 +2049,9 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
dqopt->info[cnt].dqi_bgrace = 0;
dqopt->ops[cnt] = NULL;
}
+ if (!sb_any_quota_loaded(sb))
+ /* All quotas was unloaded, hash no longer needed */
+ dquot_hash_destroy(sb);
mutex_unlock(&dqopt->dqonoff_mutex);
/* Skip syncing and setting flags if quota files are hidden */
@@ -2081,7 +2127,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
struct quota_format_type *fmt = find_quota_format(format_id);
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
- int error;
+ int error, hash_alloc;
int oldflags = -1;
if (!fmt)
@@ -2115,6 +2161,16 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
invalidate_bdev(sb->s_bdev);
}
mutex_lock(&dqopt->dqonoff_mutex);
+
+ hash_alloc = !sb_any_quota_loaded(sb);
+ if (hash_alloc) {
+ error = dquot_hash_alloc(sb, 0);
+ if (error) {
+ hash_alloc = 0;
+ goto out_lock;
+ }
+ }
+
if (sb_has_quota_loaded(sb, type)) {
error = -EBUSY;
goto out_lock;
@@ -2168,6 +2224,8 @@ out_file_init:
dqopt->files[type] = NULL;
iput(inode);
out_lock:
+ if (hash_alloc)
+ dquot_hash_destroy(sb);
if (oldflags != -1) {
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
/* Set the flags back (in the case of accidental quotaon()
@@ -2224,8 +2282,10 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
struct path *path)
{
int error = security_quota_on(path->dentry);
+
if (error)
return error;
+
/* Quota file not on the same filesystem? */
if (path->mnt->mnt_sb != sb)
error = -EXDEV;
@@ -2643,8 +2703,7 @@ static ctl_table sys_table[] = {
static int __init dquot_init(void)
{
int i, ret;
- unsigned long nr_hash, order;
-
+
printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
register_sysctl_table(sys_table);
@@ -2655,33 +2714,11 @@ static int __init dquot_init(void)
SLAB_MEM_SPREAD|SLAB_PANIC),
NULL);
- order = 0;
- dquot_hash = (struct hlist_head *)__get_free_pages(GFP_ATOMIC, order);
- if (!dquot_hash)
- panic("Cannot create dquot hash table");
-
for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
ret = percpu_counter_init(&dqstats.counter[i], 0);
if (ret)
panic("Cannot create dquot stat counters");
}
-
- /* Find power-of-two hlist_heads which can fit into allocation */
- nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
- dq_hash_bits = 0;
- do {
- dq_hash_bits++;
- } while (nr_hash >> dq_hash_bits);
- dq_hash_bits--;
-
- nr_hash = 1UL << dq_hash_bits;
- dq_hash_mask = nr_hash - 1;
- for (i = 0; i < nr_hash; i++)
- INIT_HLIST_HEAD(dquot_hash + i);
-
- printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n",
- nr_hash, order, (PAGE_SIZE << order));
-
register_shrinker(&dqcache_shrinker);
return 0;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 134c18d..eb083fc 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -393,15 +393,23 @@ static inline void quota_send_warning(short type, unsigned int id, dev_t dev,
}
#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */
+struct dquot_hash {
+ struct hlist_head *head;
+ unsigned int order;
+ unsigned int bits;
+ unsigned int mask;
+};
+
struct quota_info {
unsigned int flags; /* Flags for diskquotas on this device */
struct mutex dqio_mutex; /* lock device while I/O in progress */
struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
spinlock_t dq_state_lock; /* serialize quota state changes*/
- spinlock_t dq_list_lock; /* protect lists */
+ spinlock_t dq_list_lock; /* protect lists and hash */
struct list_head dq_inuse_list; /* list of inused dquotas */
struct list_head dq_free_list; /* list of free dquotas */
+ struct dquot_hash dq_hash; /* dquot lookup hash */
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 05/11] quota: make per-sb hash array
2010-10-05 18:20 ` [PATCH 05/11] quota: make per-sb hash array Dmitry Monakhov
@ 2010-10-06 10:38 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 10:38 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:21, Dmitry Monakhov wrote:
> @@ -1925,6 +1927,47 @@ int dquot_file_open(struct inode *inode, struct file *file)
> }
> EXPORT_SYMBOL(dquot_file_open);
>
> +int dquot_hash_alloc(struct super_block *sb, int order)
> +{
> + unsigned long nr_hash, i;
> + struct hlist_head *hash_array;
> + struct dquot_hash *dq_hash = &sb_dqopt(sb)->dq_hash;
> +
> + hash_array = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order);
> + if (!hash_array)
> + return -ENOMEM;
> +
> + /* Find power-of-two hlist_heads which can fit into allocation */
> + nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
> + dq_hash->bits = 0;
> + do {
> + dq_hash->bits++;
> + } while (nr_hash >> dq_hash->bits);
> + dq_hash->bits--;
We have ilog2() function for this these days...
> +void dquot_hash_destroy(struct super_block *sb)
> +{
> +
> + struct dquot_hash * dq_hash = &sb_dqopt(sb)->dq_hash;
> + unsigned long i, nr_hash = 1UL << dq_hash->bits;
> + unsigned long addr = (unsigned long )dq_hash->head;
^ superfluous space
> +
> + for (i = 0; i < nr_hash; i++)
> + BUG_ON(!hlist_empty(dq_hash->head + i));
Just WARN_ON + return would be better. It's probably better to leak a
page of kernel memory than crash the whole kernel...
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 06/11] quota: remove global dq_list_lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (4 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 05/11] quota: make per-sb hash array Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 07/11] quota: rename dq_lock Dmitry Monakhov
` (5 subsequent siblings)
11 siblings, 0 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
dq_list_lock is no longer responsible for any synchronization,
and we may remove it now.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 37 ++-----------------------------------
1 files changed, 2 insertions(+), 35 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e93b323..7401ce8 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -90,8 +90,8 @@
* about latest values take it as well.
*
* The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
- * dq_list_lock > sb->s_dquot->dq_state_lock
- * dq_list_lock > sb->s_dquot->dq_list_lock
+ * dq_list_lock > dq_state_lock
+ * dq_list_lock > dq_list_lock
*
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
@@ -127,7 +127,6 @@
* i_mutex on quota files is special (it's below dqio_mutex)
*/
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_fmt_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
@@ -346,7 +345,6 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
if (test_bit(DQ_MOD_B, &dquot->dq_flags))
return 1;
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
list_add(&dquot->dq_dirty,
@@ -354,7 +352,6 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
ret = 0;
}
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
return ret;
}
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
@@ -446,15 +443,12 @@ int dquot_commit(struct dquot *dquot)
struct quota_info *dqopt = dq_opt(dquot);
mutex_lock(&dqopt->dqio_mutex);
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
if (!clear_dquot_dirty(dquot)) {
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
goto out_sem;
}
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
/* Inactive dquot can be only if there was error during read/init
* => we have better not writing it */
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
@@ -526,7 +520,6 @@ static void invalidate_dquots(struct super_block *sb, int type)
struct quota_info *dqopt = sb_dqopt(sb);
restart:
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
list_for_each_entry_safe(dquot, tmp, &dqopt->dq_inuse_list, dq_inuse) {
if (dquot->dq_sb != sb)
@@ -541,7 +534,6 @@ restart:
prepare_to_wait(&dquot->dq_wait_unused, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
/* Once dqput() wakes us up, we know it's time to free
* the dquot.
* IMPORTANT: we rely on the fact that there is always
@@ -568,7 +560,6 @@ restart:
do_destroy_dquot(dquot);
}
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
}
/* Call callback for every active dquot on given filesystem */
@@ -581,7 +572,6 @@ int dquot_scan_active(struct super_block *sb,
int ret = 0;
mutex_lock(&dqopt->dqonoff_mutex);
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
list_for_each_entry(dquot, &dqopt->dq_inuse_list, dq_inuse) {
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
@@ -591,20 +581,17 @@ int dquot_scan_active(struct super_block *sb,
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
ret = fn(dquot, priv);
if (ret < 0)
goto out;
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
}
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
out:
dqput(old_dquot);
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -625,7 +612,6 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
dirty = &dqopt->info[cnt].dqi_dirty_list;
while (!list_empty(dirty)) {
@@ -641,16 +627,13 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
* use count */
atomic_inc(&dquot->dq_count);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
sb->dq_op->write_dquot(dquot);
dqput(dquot);
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
}
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -723,9 +706,7 @@ static void prune_dqcache(int count)
static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
- spin_lock(&dq_list_lock);
prune_dqcache(nr);
- spin_unlock(&dq_list_lock);
}
return ((unsigned)
percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
@@ -758,7 +739,6 @@ void dqput(struct dquot *dquot)
dqopt = dq_opt(dquot);
dqstats_inc(DQST_DROPS);
we_slept:
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
/* We have more than one user... nothing to do */
@@ -768,13 +748,11 @@ we_slept:
atomic_read(&dquot->dq_count) == 1)
wake_up(&dquot->dq_wait_unused);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
return;
}
/* Need to release dquot? */
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
ret = dquot->dq_sb->dq_op->write_dquot(dquot);
if (ret < 0) {
@@ -785,11 +763,9 @@ we_slept:
* We clear dirty bit anyway, so that we avoid
* infinite loop here
*/
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
clear_dquot_dirty(dquot);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
}
goto we_slept;
}
@@ -797,7 +773,6 @@ we_slept:
clear_dquot_dirty(dquot);
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
dquot->dq_sb->dq_op->release_dquot(dquot);
goto we_slept;
}
@@ -808,7 +783,6 @@ we_slept:
#endif
put_dquot_last(dquot);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
}
EXPORT_SYMBOL(dqput);
@@ -855,13 +829,11 @@ struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
if (!sb_has_quota_active(sb, type))
return NULL;
we_slept:
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
spin_lock(&dqopt->dq_state_lock);
if (!sb_has_quota_active(sb, type)) {
spin_unlock(&dqopt->dq_state_lock);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
goto out;
}
spin_unlock(&sb_dqopt(sb)->dq_state_lock);
@@ -870,7 +842,6 @@ we_slept:
if (!dquot) {
if (!empty) {
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
empty = get_empty_dquot(sb, type);
if (!empty)
schedule(); /* Try to wait for a moment... */
@@ -884,14 +855,12 @@ we_slept:
/* hash it first so it can be found */
insert_dquot_hash(dquot);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
dqstats_inc(DQST_LOOKUPS);
} else {
if (!atomic_read(&dquot->dq_count))
remove_free_dquot(dquot);
atomic_inc(&dquot->dq_count);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
dqstats_inc(DQST_CACHE_HITS);
dqstats_inc(DQST_LOOKUPS);
}
@@ -1007,13 +976,11 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
"dq_count %d to dispose list",
atomic_read(&dquot->dq_count));
#endif
- spin_lock(&dq_list_lock);
spin_lock(&dqopt->dq_list_lock);
/* As dquot must have currently users it can't be on
* the free list... */
list_add(&dquot->dq_free, tofree_head);
spin_unlock(&dqopt->dq_list_lock);
- spin_unlock(&dq_list_lock);
return 1;
}
else
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* [PATCH 07/11] quota: rename dq_lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (5 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 06/11] quota: remove global dq_list_lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 08/11] quota: make per-sb dq_data_lock Dmitry Monakhov
` (4 subsequent siblings)
11 siblings, 0 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Give dquot mutex more appropriate name.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/ocfs2/quota_global.c | 14 +++++++-------
fs/quota/dquot.c | 26 +++++++++++++-------------
fs/quota/quota_tree.c | 2 +-
include/linux/quota.h | 2 +-
4 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2c88a87..79c3960 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -32,7 +32,7 @@
* Locking of quotas with OCFS2 is rather complex. Here are rules that
* should be obeyed by all the functions:
* - any write of quota structure (either to local or global file) is protected
- * by dqio_mutex or dquot->dq_lock.
+ * by dqio_mutex or dquot->dq_mutex.
* - any modification of global quota file holds inode cluster lock, i_mutex,
* and ip_alloc_sem of the global quota file (achieved by
* ocfs2_lock_global_qf). It also has to hold qinfo_lock.
@@ -47,13 +47,13 @@
* write to gf
* -> write to lf
* Acquire dquot for the first time:
- * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf
+ * dq_mutex -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf
* -> alloc space for gf
* -> start_trans -> qinfo_lock -> write to gf
* -> ip_alloc_sem of lf -> alloc space for lf
* -> write to lf
* Release last reference to dquot:
- * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf
+ * dq_mutex -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf
* -> write to lf
* Note that all the above operations also hold the inode cluster lock of lf.
* Recovery:
@@ -690,7 +690,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
- mutex_lock(&dquot->dq_lock);
+ mutex_lock(&dquot->dq_mutex);
/* Check whether we are not racing with some other dqget() */
if (atomic_read(&dquot->dq_count) > 1)
goto out;
@@ -723,7 +723,7 @@ out_trans:
out_ilock:
ocfs2_unlock_global_qf(oinfo, 1);
out:
- mutex_unlock(&dquot->dq_lock);
+ mutex_unlock(&dquot->dq_mutex);
mlog_exit(status);
return status;
}
@@ -746,7 +746,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
handle_t *handle;
mlog_entry("id=%u, type=%d", dquot->dq_id, type);
- mutex_lock(&dquot->dq_lock);
+ mutex_lock(&dquot->dq_mutex);
/*
* We need an exclusive lock, because we're going to update use count
* and instantiate possibly new dquot structure
@@ -810,7 +810,7 @@ out_dq:
goto out;
set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out:
- mutex_unlock(&dquot->dq_lock);
+ mutex_unlock(&dquot->dq_mutex);
mlog_exit(status);
return status;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 7401ce8..d591fb9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -106,17 +106,17 @@
* sure they cannot race with quotaon which first sets S_NOQUOTA flag and
* then drops all pointers to dquots from an inode.
*
- * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
- * from inodes (dquot_alloc_space() and such don't check the dq_lock).
+ * Each dquot has its dq_mutex mutex. Locked dquots might not be referenced
+ * from inodes (dquot_alloc_space() and such don't check the dq_mutex).
* Currently dquot is locked only when it is being read to memory (or space for
* it is being allocated) on the first dqget() and when it is being released on
* the last dqput(). The allocation and release oparations are serialized by
- * the dq_lock and by checking the use count in dquot_release(). Write
- * operations on dquots don't hold dq_lock as they copy data under dq_data_lock
+ * the dq_mutex and by checking the use count in dquot_release(). Write
+ * operations on dquots don't hold dq_mutex as they copy data under dq_data_lock
* spinlock to internal buffers before writing.
*
* Lock ordering (including related VFS locks) is the following:
- * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock >
+ * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_mutex >
* dqio_mutex
* The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
* dqptr_sem. But filesystem has to count with the fact that functions such as
@@ -321,8 +321,8 @@ static inline void remove_inuse(struct dquot *dquot)
static void wait_on_dquot(struct dquot *dquot)
{
- mutex_lock(&dquot->dq_lock);
- mutex_unlock(&dquot->dq_lock);
+ mutex_lock(&dquot->dq_mutex);
+ mutex_unlock(&dquot->dq_mutex);
}
static inline int dquot_dirty(struct dquot *dquot)
@@ -404,7 +404,7 @@ int dquot_acquire(struct dquot *dquot)
int ret = 0, ret2 = 0;
struct quota_info *dqopt = dq_opt(dquot);
- mutex_lock(&dquot->dq_lock);
+ mutex_lock(&dquot->dq_mutex);
mutex_lock(&dqopt->dqio_mutex);
if (!test_bit(DQ_READ_B, &dquot->dq_flags))
ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot);
@@ -429,7 +429,7 @@ int dquot_acquire(struct dquot *dquot)
set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_iolock:
mutex_unlock(&dqopt->dqio_mutex);
- mutex_unlock(&dquot->dq_lock);
+ mutex_unlock(&dquot->dq_mutex);
return ret;
}
EXPORT_SYMBOL(dquot_acquire);
@@ -474,7 +474,7 @@ int dquot_release(struct dquot *dquot)
int ret = 0, ret2 = 0;
struct quota_info *dqopt = dq_opt(dquot);
- mutex_lock(&dquot->dq_lock);
+ mutex_lock(&dquot->dq_mutex);
/* Check whether we are not racing with some other dqget() */
if (atomic_read(&dquot->dq_count) > 1)
goto out_dqlock;
@@ -492,7 +492,7 @@ int dquot_release(struct dquot *dquot)
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
mutex_unlock(&dqopt->dqio_mutex);
out_dqlock:
- mutex_unlock(&dquot->dq_lock);
+ mutex_unlock(&dquot->dq_mutex);
return ret;
}
EXPORT_SYMBOL(dquot_release);
@@ -800,7 +800,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
if(!dquot)
return NULL;
- mutex_init(&dquot->dq_lock);
+ mutex_init(&dquot->dq_mutex);
INIT_LIST_HEAD(&dquot->dq_free);
INIT_LIST_HEAD(&dquot->dq_inuse);
INIT_HLIST_NODE(&dquot->dq_hash);
@@ -864,7 +864,7 @@ we_slept:
dqstats_inc(DQST_CACHE_HITS);
dqstats_inc(DQST_LOOKUPS);
}
- /* Wait for dq_lock - after this we know that either dquot_release() is
+ /* Wait for dq_mutex - after this we know that either dquot_release() is
* already finished or it will be canceled due to dq_count > 1 test */
wait_on_dquot(dquot);
/* Read the dquot / allocate space in quota file */
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 3eb95ec..b568718 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -647,7 +647,7 @@ out:
EXPORT_SYMBOL(qtree_read_dquot);
/* Check whether dquot should not be deleted. We know we are
- * the only one operating on dquot (thanks to dq_lock) */
+ * the only one operating on dquot (thanks to dq_mutex) */
int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) &&
diff --git a/include/linux/quota.h b/include/linux/quota.h
index eb083fc..d257131 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -287,7 +287,7 @@ struct dquot {
struct list_head dq_inuse; /* List of all quotas */
struct list_head dq_free; /* Free list element */
struct list_head dq_dirty; /* List of dirty dquots */
- struct mutex dq_lock; /* dquot IO lock */
+ struct mutex dq_mutex; /* dquot IO mutex */
atomic_t dq_count; /* Use count */
wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */
struct super_block *dq_sb; /* superblock this applies to */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* [PATCH 08/11] quota: make per-sb dq_data_lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (6 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 07/11] quota: rename dq_lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 11:01 ` Jan Kara
2010-10-05 18:20 ` [PATCH 09/11] quota: protect dquot mem info with objects's lock Dmitry Monakhov
` (3 subsequent siblings)
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
Currently dq_data_lock is global, which is bad for scalability.
In fact different super_blocks has no shared quota's data.
So we may simply convert global lock to per-sb lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/ocfs2/quota_global.c | 24 ++++++++++++------------
fs/ocfs2/quota_local.c | 13 +++++++------
fs/quota/dquot.c | 46 ++++++++++++++++++++++------------------------
fs/quota/quota_tree.c | 8 ++++----
fs/quota/quota_v2.c | 4 ++--
fs/super.c | 1 +
include/linux/quota.h | 3 +--
7 files changed, 49 insertions(+), 50 deletions(-)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 79c3960..6c4bc77 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -300,12 +300,12 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
if (status < 0)
return status;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(oinfo->dqi_gqinode->i_sb)->dq_data_lock);
if (!oinfo->dqi_gqi_count++)
oinfo->dqi_gqi_bh = bh;
else
WARN_ON(bh != oinfo->dqi_gqi_bh);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(oinfo->dqi_gqinode->i_sb)->dq_data_lock);
if (ex) {
mutex_lock(&oinfo->dqi_gqinode->i_mutex);
down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -325,10 +325,10 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
}
ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
brelse(oinfo->dqi_gqi_bh);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(oinfo->dqi_gqinode->i_sb)->dq_data_lock);
if (!--oinfo->dqi_gqi_count)
oinfo->dqi_gqi_bh = NULL;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(oinfo->dqi_gqinode->i_sb)->dq_data_lock);
}
/* Read information header from global quota file */
@@ -421,11 +421,11 @@ static int __ocfs2_global_write_info(struct super_block *sb, int type)
struct ocfs2_global_disk_dqinfo dinfo;
ssize_t size;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
info->dqi_flags &= ~DQF_INFO_DIRTY;
dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
@@ -502,7 +502,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
/* Update space and inode usage. Get also other information from
* global quota file so that we don't overwrite any changes there.
* We are */
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
spacechange = dquot->dq_dqb.dqb_curspace -
OCFS2_DQUOT(dquot)->dq_origspace;
inodechange = dquot->dq_dqb.dqb_curinodes -
@@ -556,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
__clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
err = ocfs2_qinfo_lock(info, freeing);
if (err < 0) {
mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
@@ -657,9 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
mlog_errno(status);
goto out;
}
- mutex_lock(&dq_opt(dqopt)->dqio_mutex);
+ mutex_lock(&dq_opt(dquot)->dqio_mutex);
status = ocfs2_local_write_dquot(dquot);
- mutex_unlock(&dq_opt(dqopt)->dqio_mutex);
+ mutex_unlock(&dq_opt(dquot)->dqio_mutex);
ocfs2_commit_trans(osb, handle);
out:
mlog_exit(status);
@@ -835,10 +835,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* In case user set some limits, sync dquot immediately to global
* quota file so that information propagates quicker */
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
if (dquot->dq_flags & mask)
sync = 1;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
/* This is a slight hack but we can't afford getting global quota
* lock if we already have a transaction started. */
if (!sync || journal_current_handle()) {
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index dc78764..c078799 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -288,14 +288,15 @@ static void olq_update_info(struct buffer_head *bh, void *private)
struct mem_dqinfo *info = private;
struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct super_block *sb = oinfo->dqi_gqinode->i_sb;
ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
OCFS2_LOCAL_INFO_OFF);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
}
static int ocfs2_add_recovery_chunk(struct super_block *sb,
@@ -523,7 +524,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
goto out_drop_lock;
}
mutex_lock(&sb_dqopt(sb)->dqio_mutex);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
/* Add usage from quota entry into quota changes
* of our node. Auxiliary variables are important
* due to signedness */
@@ -531,7 +532,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
inodechange = le64_to_cpu(dqblk->dqb_inodemod);
dquot->dq_dqb.dqb_curspace += spacechange;
dquot->dq_dqb.dqb_curinodes += inodechange;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
/* We want to drop reference held by the crashed
* node. Since we have our own reference we know
* global structure actually won't be freed. */
@@ -876,12 +877,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
+ ol_dqblk_block_offset(sb, od->dq_local_off));
dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
od->dq_origspace);
dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
od->dq_originodes);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
(long long)le64_to_cpu(dqblk->dqb_inodemod));
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d591fb9..47bc291 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -128,8 +128,6 @@
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_fmt_lock);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
-EXPORT_SYMBOL(dq_data_lock);
void __quota_error(struct super_block *sb, const char *func,
const char *fmt, ...)
@@ -1563,14 +1561,14 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!inode->i_dquot[cnt])
continue;
ret = check_bdq(inode->i_dquot[cnt], number, !warn,
warntype+cnt);
if (ret && !nofail) {
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
goto out_flush_warn;
}
}
@@ -1583,7 +1581,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
dquot_incr_space(inode->i_dquot[cnt], number);
}
inode_incr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_flush_warn;
@@ -1611,7 +1609,7 @@ int dquot_alloc_inode(const struct inode *inode)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!inode->i_dquot[cnt])
continue;
@@ -1627,7 +1625,7 @@ int dquot_alloc_inode(const struct inode *inode)
}
warn_put_all:
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (ret == 0)
mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
@@ -1649,7 +1647,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt])
@@ -1658,7 +1656,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
}
/* Update inode bytes */
inode_claim_rsv_space(inode, number);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
@@ -1682,7 +1680,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!inode->i_dquot[cnt])
continue;
@@ -1693,7 +1691,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
dquot_decr_space(inode->i_dquot[cnt], number);
}
inode_decr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_unlock;
@@ -1718,14 +1716,14 @@ void dquot_free_inode(const struct inode *inode)
return;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!inode->i_dquot[cnt])
continue;
warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
dquot_decr_inodes(inode->i_dquot[cnt], 1);
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1763,7 +1761,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
cur_space = inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
@@ -1808,7 +1806,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
inode->i_dquot[cnt] = transfer_to[cnt];
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
mark_all_dquot_dirty(transfer_from);
@@ -1822,7 +1820,7 @@ warn:
flush_warnings(transfer_from, warntype_from_space);
return ret;
over_quota:
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
goto warn;
}
@@ -2364,7 +2362,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
FS_USER_QUOTA : FS_GROUP_QUOTA;
di->d_id = dquot->dq_id;
- spin_lock(&dq_data_lock);
+ spin_lock(&dq_opt(dquot)->dq_data_lock);
di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
di->d_ino_hardlimit = dm->dqb_ihardlimit;
@@ -2373,7 +2371,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
di->d_icount = dm->dqb_curinodes;
di->d_btimer = dm->dqb_btime;
di->d_itimer = dm->dqb_itime;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dq_opt(dquot)->dq_data_lock);
}
int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
@@ -2416,7 +2414,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
(di->d_ino_hardlimit > dqi->dqi_maxilimit)))
return -ERANGE;
- spin_lock(&dq_data_lock);
+ spin_lock(&dq_opt(dquot)->dq_data_lock);
if (di->d_fieldmask & FS_DQ_BCOUNT) {
dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
check_blim = 1;
@@ -2482,7 +2480,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
clear_bit(DQ_FAKE_B, &dquot->dq_flags);
else
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dq_opt(dquot)->dq_data_lock);
mark_dquot_dirty(dquot);
return 0;
@@ -2517,12 +2515,12 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
return -ESRCH;
}
mi = sb_dqopt(sb)->info + type;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
ii->dqi_bgrace = mi->dqi_bgrace;
ii->dqi_igrace = mi->dqi_igrace;
ii->dqi_flags = mi->dqi_flags & DQF_MASK;
ii->dqi_valid = IIF_ALL;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
}
@@ -2540,7 +2538,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
goto out;
}
mi = sb_dqopt(sb)->info + type;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
if (ii->dqi_valid & IIF_BGRACE)
mi->dqi_bgrace = ii->dqi_bgrace;
if (ii->dqi_valid & IIF_IGRACE)
@@ -2548,7 +2546,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
if (ii->dqi_valid & IIF_FLAGS)
mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) |
(ii->dqi_flags & DQF_MASK);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
mark_info_dirty(sb, type);
/* Force write to disk */
sb->dq_op->write_info(sb, type);
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index b568718..8b04f24 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -375,9 +375,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
return ret;
}
}
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
dquot->dq_off);
if (ret != info->dqi_entry_size) {
@@ -631,14 +631,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
kfree(ddquot);
goto out;
}
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
kfree(ddquot);
out:
dqstats_inc(DQST_READS);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 65444d2..3e5a69a 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -153,12 +153,12 @@ static int v2_write_file_info(struct super_block *sb, int type)
struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
ssize_t size;
- spin_lock(&dq_data_lock);
+ spin_lock(&sb_dqopt(sb)->dq_data_lock);
info->dqi_flags &= ~DQF_INFO_DIRTY;
dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&sb_dqopt(sb)->dq_data_lock);
dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
diff --git a/fs/super.c b/fs/super.c
index 852866b..7ce432d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -108,6 +108,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
spin_lock_init(&s->s_dquot.dq_state_lock);
init_rwsem(&s->s_dquot.dqptr_sem);
spin_lock_init(&s->s_dquot.dq_list_lock);
+ spin_lock_init(&s->s_dquot.dq_data_lock);
INIT_LIST_HEAD(&s->s_dquot.dq_inuse_list);
INIT_LIST_HEAD(&s->s_dquot.dq_free_list);
init_waitqueue_head(&s->s_wait_unfrozen);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index d257131..9e7a102 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -186,8 +186,6 @@ enum {
typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
typedef long long qsize_t; /* Type in which we store sizes */
-extern spinlock_t dq_data_lock;
-
/* Maximal numbers of writes for quota operation (insert/delete/update)
* (over VFS all formats) */
#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC)
@@ -406,6 +404,7 @@ struct quota_info {
struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */
struct rw_semaphore dqptr_sem; /* serialize ops using quota_info struct, pointers from inode to dquots */
spinlock_t dq_state_lock; /* serialize quota state changes*/
+ spinlock_t dq_data_lock; /* protect in memory data */
spinlock_t dq_list_lock; /* protect lists and hash */
struct list_head dq_inuse_list; /* list of inused dquotas */
struct list_head dq_free_list; /* list of free dquotas */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 08/11] quota: make per-sb dq_data_lock
2010-10-05 18:20 ` [PATCH 08/11] quota: make per-sb dq_data_lock Dmitry Monakhov
@ 2010-10-06 11:01 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 11:01 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:24, Dmitry Monakhov wrote:
> Currently dq_data_lock is global, which is bad for scalability.
> In fact different super_blocks has no shared quota's data.
> So we may simply convert global lock to per-sb lock.
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
...
> @@ -657,9 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
> mlog_errno(status);
> goto out;
> }
> - mutex_lock(&dq_opt(dqopt)->dqio_mutex);
> + mutex_lock(&dq_opt(dquot)->dqio_mutex);
> status = ocfs2_local_write_dquot(dquot);
> - mutex_unlock(&dq_opt(dqopt)->dqio_mutex);
> + mutex_unlock(&dq_opt(dquot)->dqio_mutex);
> ocfs2_commit_trans(osb, handle);
I guess this hunk belongs to the first patch.
> diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
> index dc78764..c078799 100644
> --- a/fs/ocfs2/quota_local.c
> +++ b/fs/ocfs2/quota_local.c
> @@ -288,14 +288,15 @@ static void olq_update_info(struct buffer_head *bh, void *private)
> struct mem_dqinfo *info = private;
> struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
> struct ocfs2_local_disk_dqinfo *ldinfo;
> + struct super_block *sb = oinfo->dqi_gqinode->i_sb;
You can directly store struct quota_info *, here.
>
> ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
> OCFS2_LOCAL_INFO_OFF);
> - spin_lock(&dq_data_lock);
> + spin_lock(&sb_dqopt(sb)->dq_data_lock);
> ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
> ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
> ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
> - spin_unlock(&dq_data_lock);
> + spin_unlock(&sb_dqopt(sb)->dq_data_lock);
> }
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (7 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 08/11] quota: make per-sb dq_data_lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 12:37 ` Jan Kara
2010-10-06 13:30 ` Jan Kara
2010-10-05 18:20 ` [PATCH 10/11] quota: drop dq_data_lock where possible Dmitry Monakhov
` (2 subsequent siblings)
11 siblings, 2 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
currently ->dq_data_lock is responsible for protecting three things
1) dquot->dq_dqb info consistency
2) synchronization between ->dq_dqb with ->i_bytes
3) Protects mem_dqinfo (per-sb data),
3b) and consystency between mem_dqinfo and dq_dqb for following data.
dqi_bgrace <=> dqb_btime
dqi_igrace <=> dqb_itime
In fact (1) and (2) is conceptually different from (3)
By introducing per-dquot data lock we later can split (1)(2) from (3)
This patch simply introduce new lock, without changing ->dq_data_lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/ocfs2/quota_global.c | 4 ++
fs/ocfs2/quota_local.c | 4 ++
fs/quota/dquot.c | 130 ++++++++++++++++++++++++++++++++++-------------
fs/quota/quota_tree.c | 4 ++
include/linux/quota.h | 1 +
5 files changed, 107 insertions(+), 36 deletions(-)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6c4bc77..1c8db96 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -503,6 +503,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
* global quota file so that we don't overwrite any changes there.
* We are */
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
spacechange = dquot->dq_dqb.dqb_curspace -
OCFS2_DQUOT(dquot)->dq_origspace;
inodechange = dquot->dq_dqb.dqb_curinodes -
@@ -556,6 +557,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
__clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
err = ocfs2_qinfo_lock(info, freeing);
if (err < 0) {
@@ -836,8 +838,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* In case user set some limits, sync dquot immediately to global
* quota file so that information propagates quicker */
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
if (dquot->dq_flags & mask)
sync = 1;
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
/* This is a slight hack but we can't afford getting global quota
* lock if we already have a transaction started. */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index c078799..3d99ec5 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -525,6 +525,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
}
mutex_lock(&sb_dqopt(sb)->dqio_mutex);
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
/* Add usage from quota entry into quota changes
* of our node. Auxiliary variables are important
* due to signedness */
@@ -532,6 +533,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
inodechange = le64_to_cpu(dqblk->dqb_inodemod);
dquot->dq_dqb.dqb_curspace += spacechange;
dquot->dq_dqb.dqb_curinodes += inodechange;
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
/* We want to drop reference held by the crashed
* node. Since we have our own reference we know
@@ -878,10 +880,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&od->dq_dquot.dq_lock);
dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
od->dq_origspace);
dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
od->dq_originodes);
+ spin_unlock(&od->dq_dquot.dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 47bc291..2c87709 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,16 +82,18 @@
/*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
- * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
+ * dq_data_lock protects mem_dqinfo structures and mem_dqinfo with
+ * dq_dqb consystency.
+ * dq_lock protects dquot->dq_dqb and also guards consistency of
+ * dquot->dq_dqb with inode->i_blocks, i_bytes.
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
* in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects
* modifications of quota state (on quotaon and quotaoff) and readers who care
* about latest values take it as well.
*
- * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
+ * The spinlock ordering is hence:
+ * dq_data_lock > dq_lock > dq_list_lock > i_lock,
* dq_list_lock > dq_state_lock
- * dq_list_lock > dq_list_lock
*
* Note that some things (eg. sb pointer, type, id) doesn't change during
* the life of the dquot structure and so needn't to be protected by a lock
@@ -378,6 +380,37 @@ static inline void dqput_all(struct dquot **dquot)
dqput(dquot[cnt]);
}
+static inline void inode_dquot_lock(const struct inode *inode,
+ struct dquot **dquot)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ dquot[cnt] = inode->i_dquot[cnt];
+ if (dquot[cnt])
+ spin_lock(&dquot[cnt]->dq_lock);
+ }
+}
+
+static inline void dquot_lock_all(struct dquot **dquot)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (dquot[cnt])
+ spin_lock(&dquot[cnt]->dq_lock);
+
+}
+
+static inline void dquot_unlock_all(struct dquot **dquot)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (dquot[cnt])
+ spin_unlock(&dquot[cnt]->dq_lock);
+}
+
/* This function needs dq_list_lock */
static inline int clear_dquot_dirty(struct dquot *dquot)
{
@@ -804,6 +837,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
INIT_HLIST_NODE(&dquot->dq_hash);
INIT_LIST_HEAD(&dquot->dq_dirty);
init_waitqueue_head(&dquot->dq_wait_unused);
+ spin_lock_init(&dquot->dq_lock);
dquot->dq_sb = sb;
dquot->dq_type = type;
atomic_set(&dquot->dq_count, 1);
@@ -1220,7 +1254,7 @@ static int ignore_hardlimit(struct dquot *dquot)
!(info->dqi_flags & V1_DQF_RSQUASH));
}
-/* needs dq_data_lock */
+/* needs dq_data_lock, ->dq_lock */
static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
{
qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
@@ -1257,7 +1291,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
return 0;
}
-/* needs dq_data_lock */
+/* needs dq_data_lock, ->dq_lock */
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
qsize_t tspace;
@@ -1547,6 +1581,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int warn = flags & DQUOT_SPACE_WARN;
int reserve = flags & DQUOT_SPACE_RESERVE;
int nofail = flags & DQUOT_SPACE_NOFAIL;
+ struct dquot *dquot[MAXQUOTAS];
/*
* First test before acquiring mutex - solves deadlocks when we
@@ -1562,32 +1597,34 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
warntype[cnt] = QUOTA_NL_NOWARN;
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- ret = check_bdq(inode->i_dquot[cnt], number, !warn,
- warntype+cnt);
+ ret = check_bdq(dquot[cnt], number, !warn, warntype+cnt);
if (ret && !nofail) {
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
goto out_flush_warn;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
if (reserve)
- dquot_resv_space(inode->i_dquot[cnt], number);
+ dquot_resv_space(dquot[cnt], number);
else
- dquot_incr_space(inode->i_dquot[cnt], number);
+ dquot_incr_space(dquot[cnt], number);
}
inode_incr_space(inode, number, reserve);
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_flush_warn;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
out_flush_warn:
- flush_warnings(inode->i_dquot, warntype);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
@@ -1601,6 +1638,7 @@ int dquot_alloc_inode(const struct inode *inode)
{
int cnt, ret = 0;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1610,25 +1648,27 @@ int dquot_alloc_inode(const struct inode *inode)
warntype[cnt] = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
+ ret = check_idq(dquot[cnt], 1, warntype + cnt);
if (ret)
goto warn_put_all;
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- dquot_incr_inodes(inode->i_dquot[cnt], 1);
+ dquot_incr_inodes(dquot[cnt], 1);
}
warn_put_all:
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (ret == 0)
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquot);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
}
@@ -1640,6 +1680,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
int cnt;
+ struct dquot *dquot[MAXQUOTAS];
if (!dquot_active(inode)) {
inode_claim_rsv_space(inode, number);
@@ -1648,16 +1689,17 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, dquot);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_claim_reserved_space(inode->i_dquot[cnt],
- number);
+ if (dquot[cnt])
+ dquot_claim_reserved_space(dquot[cnt], number);
}
/* Update inode bytes */
inode_claim_rsv_space(inode, number);
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
@@ -1671,6 +1713,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
unsigned int cnt;
char warntype[MAXQUOTAS];
int reserve = flags & DQUOT_SPACE_RESERVE;
+ struct dquot *dquot[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1681,23 +1724,25 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
+ warntype[cnt] = info_bdq_free(dquot[cnt], number);
if (reserve)
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ dquot_free_reserved_space(dquot[cnt], number);
else
- dquot_decr_space(inode->i_dquot[cnt], number);
+ dquot_decr_space(dquot[cnt], number);
}
inode_decr_space(inode, number, reserve);
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_unlock;
- mark_all_dquot_dirty(inode->i_dquot);
+ mark_all_dquot_dirty(dquot);
out_unlock:
- flush_warnings(inode->i_dquot, warntype);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
}
EXPORT_SYMBOL(__dquot_free_space);
@@ -1709,6 +1754,7 @@ void dquot_free_inode(const struct inode *inode)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
+ struct dquot *dquot[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1717,15 +1763,17 @@ void dquot_free_inode(const struct inode *inode)
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!inode->i_dquot[cnt])
+ if (!dquot[cnt])
continue;
- warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
- dquot_decr_inodes(inode->i_dquot[cnt], 1);
+ warntype[cnt] = info_idq_free(dquot[cnt], 1);
+ dquot_decr_inodes(dquot[cnt], 1);
}
+ dquot_unlock_all(dquot);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
- mark_all_dquot_dirty(inode->i_dquot);
- flush_warnings(inode->i_dquot, warntype);
+ mark_all_dquot_dirty(dquot);
+ flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
}
EXPORT_SYMBOL(dquot_free_inode);
@@ -1762,14 +1810,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
return 0;
}
spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
+ inode_dquot_lock(inode, transfer_from);
cur_space = inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
+ dquot_lock_all(transfer_to);
/* Build the transfer_from list and check the limits */
+
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!transfer_to[cnt])
continue;
- transfer_from[cnt] = inode->i_dquot[cnt];
ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
if (ret)
goto over_quota;
@@ -1806,6 +1856,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
inode->i_dquot[cnt] = transfer_to[cnt];
}
+ dquot_unlock_all(transfer_to);
+ dquot_unlock_all(transfer_from);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1820,6 +1872,8 @@ warn:
flush_warnings(transfer_from, warntype_from_space);
return ret;
over_quota:
+ dquot_unlock_all(transfer_to);
+ dquot_unlock_all(transfer_from);
spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
goto warn;
@@ -2363,6 +2417,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
di->d_id = dquot->dq_id;
spin_lock(&dq_opt(dquot)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
di->d_ino_hardlimit = dm->dqb_ihardlimit;
@@ -2371,6 +2426,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
di->d_icount = dm->dqb_curinodes;
di->d_btimer = dm->dqb_btime;
di->d_itimer = dm->dqb_itime;
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&dq_opt(dquot)->dq_data_lock);
}
@@ -2415,6 +2471,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
return -ERANGE;
spin_lock(&dq_opt(dquot)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
if (di->d_fieldmask & FS_DQ_BCOUNT) {
dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
check_blim = 1;
@@ -2480,6 +2537,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
clear_bit(DQ_FAKE_B, &dquot->dq_flags);
else
set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&dq_opt(dquot)->dq_data_lock);
mark_dquot_dirty(dquot);
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 8b04f24..1643c30 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -376,7 +376,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
}
}
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
dquot->dq_off);
@@ -632,12 +634,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
goto out;
}
spin_lock(&sb_dqopt(sb)->dq_data_lock);
+ spin_lock(&dquot->dq_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ spin_unlock(&dquot->dq_lock);
spin_unlock(&sb_dqopt(sb)->dq_data_lock);
kfree(ddquot);
out:
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9e7a102..197660f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -294,6 +294,7 @@ struct dquot {
unsigned long dq_flags; /* See DQ_* */
short dq_type; /* Type of quota */
struct mem_dqblk dq_dqb; /* Diskquota usage */
+ spinlock_t dq_lock; /* protect in mem_dqblk */
};
/* Operations which must be implemented by each quota format */
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-05 18:20 ` [PATCH 09/11] quota: protect dquot mem info with objects's lock Dmitry Monakhov
@ 2010-10-06 12:37 ` Jan Kara
2010-10-06 13:17 ` Dmitry
2010-10-06 14:19 ` Dmitry
2010-10-06 13:30 ` Jan Kara
1 sibling, 2 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 12:37 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:25, Dmitry Monakhov wrote:
> currently ->dq_data_lock is responsible for protecting three things
>
> 1) dquot->dq_dqb info consistency
> 2) synchronization between ->dq_dqb with ->i_bytes
> 3) Protects mem_dqinfo (per-sb data),
> 3b) and consystency between mem_dqinfo and dq_dqb for following data.
^^ consistency
> dqi_bgrace <=> dqb_btime
> dqi_igrace <=> dqb_itime
>
> In fact (1) and (2) is conceptually different from (3)
> By introducing per-dquot data lock we later can split (1)(2) from (3)
> This patch simply introduce new lock, without changing ->dq_data_lock.
^^^^^^^^ introduces a
I'd be interested in how much this split brings. The hold time of
dq_data_lock isn't so big and I wonder if the higher scalability won't
be mitigated by cache ping-pongs of structures themselves...
> @@ -378,6 +380,37 @@ static inline void dqput_all(struct dquot **dquot)
> dqput(dquot[cnt]);
> }
>
> +static inline void inode_dquot_lock(const struct inode *inode,
> + struct dquot **dquot)
> +{
> + unsigned int cnt;
> +
> + for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> + dquot[cnt] = inode->i_dquot[cnt];
> + if (dquot[cnt])
> + spin_lock(&dquot[cnt]->dq_lock);
> + }
> +}
> +
> +static inline void dquot_lock_all(struct dquot **dquot)
> +{
> + unsigned int cnt;
> +
> + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> + if (dquot[cnt])
> + spin_lock(&dquot[cnt]->dq_lock);
> +
> +}
> +
> +static inline void dquot_unlock_all(struct dquot **dquot)
> +{
> + unsigned int cnt;
> +
> + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> + if (dquot[cnt])
> + spin_unlock(&dquot[cnt]->dq_lock);
> +}
Please, just have two locking functions like:
lock_inode_dquots(inode->i_dquot), unlock_inode_dquots(inode->i_dquot).
Also please avoid this copying of pointers to dquot structures. It just
makes things harder to read and since dquot structures are reference
counted, it's also not obviously correct strictly speaking (although fine
in the end in your case since you hold dqptr_sem). If you wish to save some
typing, just keep inode->i_dquot in a local variable.
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-06 12:37 ` Jan Kara
@ 2010-10-06 13:17 ` Dmitry
2010-10-06 13:41 ` Jan Kara
2010-10-06 14:19 ` Dmitry
1 sibling, 1 reply; 35+ messages in thread
From: Dmitry @ 2010-10-06 13:17 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Wed, 6 Oct 2010 14:37:27 +0200, Jan Kara <jack@suse.cz> wrote:
> On Tue 05-10-10 22:20:25, Dmitry Monakhov wrote:
> > currently ->dq_data_lock is responsible for protecting three things
> >
> > 1) dquot->dq_dqb info consistency
> > 2) synchronization between ->dq_dqb with ->i_bytes
> > 3) Protects mem_dqinfo (per-sb data),
> > 3b) and consystency between mem_dqinfo and dq_dqb for following data.
> ^^ consistency
>
> > dqi_bgrace <=> dqb_btime
> > dqi_igrace <=> dqb_itime
> >
> > In fact (1) and (2) is conceptually different from (3)
> > By introducing per-dquot data lock we later can split (1)(2) from (3)
> > This patch simply introduce new lock, without changing ->dq_data_lock.
> ^^^^^^^^ introduces a
>
> I'd be interested in how much this split brings. The hold time of
> dq_data_lock isn't so big and I wonder if the higher scalability won't
> be mitigated by cache ping-pongs of structures themselves...
>
> > @@ -378,6 +380,37 @@ static inline void dqput_all(struct dquot **dquot)
> > dqput(dquot[cnt]);
> > }
> >
> > +static inline void inode_dquot_lock(const struct inode *inode,
> > + struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> > + dquot[cnt] = inode->i_dquot[cnt];
> > + if (dquot[cnt])
> > + spin_lock(&dquot[cnt]->dq_lock);
> > + }
> > +}
> > +
> > +static inline void dquot_lock_all(struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> > + if (dquot[cnt])
> > + spin_lock(&dquot[cnt]->dq_lock);
> > +
> > +}
> > +
> > +static inline void dquot_unlock_all(struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> > + if (dquot[cnt])
> > + spin_unlock(&dquot[cnt]->dq_lock);
> > +}
> Please, just have two locking functions like:
> lock_inode_dquots(inode->i_dquot), unlock_inode_dquots(inode->i_dquot).
> Also please avoid this copying of pointers to dquot structures. It just
> makes things harder to read and since dquot structures are reference
> counted, it's also not obviously correct strictly speaking (although fine
> in the end in your case since you hold dqptr_sem). If you wish to save some
> typing, just keep inode->i_dquot in a local variable.
I've copied i_dquot to local with the purpose.
Later i've plans to get rid of dqptr_sem by rearranging code like follows.
But may be i've introduced copy logic too soon.
Variant (1)
1) Protect i_dquot dereference/modification with i_lock
So charge/free function will looks like follows
do_alloc_space (struct inode* inode, qsize_t num)
{
spin_lock(&inode->i_lock);
if (!check_bdq(inode->i_dquot, num))
return EQDUOT;
inode_add_bytes_unlocked(inode, num);
dquot_add_bytes(inode->i_dquot, num);
/* We can sleep while we call quota dirty, to protect quota from
* being deleted we have to get ref for the quota.
*/
atomic_inc(dquot->dq_count);
dquot = inode->i_dquot;
spin_unlock(&inode->i_lock);
/* i_lock was dropped, do not touch i_dquot any more */
mark_dquot_dirty(dquot);
dqput(dquot);
}
This is beter than before, but we still have massive anomic operations
So we can try second variant.
Variant (2)
do_alloc_space(struct inode* inode, qsize_t num)
{
idx = srcu_read_lock(&dqptr)
spin_lock(&inode->i_lock);
if (!check_bdq(inode->i_dquot, num))
return EQDUOT;
inode_add_bytes_unlocked(inode, num);
dquot_add_bytes(inode->i_dquot, num);
/* We can sleep while we call quota dirty,
* But this is ok since quota deletion is protected by SRCU
* The only thing we have do is to save old i_dquot array
*/
dquot = inode->i_dquot;
spin_unlock(&inode->i_lock);
/* i_lock was dropped, do not touch i_dquot any more */
mark_dquot_dirty(dquot);
srcu_read_unlock(&dqptr, idx);
}
/* Destroy method may looks like whis, it will be called from
background work.
*/
do_destroy_quotas_loop() {
/* Wait for all quotas in free list */
synchronize_srcu(dqptr&)
/*
Ok now we can safely destroy all quotas in the list,
Some of them may be dirty, so we have to write it first.
*/
list_for_each_entry(dquot, &free_list, dq_free)
dquot_release(dquot);
}
>
> Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-06 13:17 ` Dmitry
@ 2010-10-06 13:41 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 13:41 UTC (permalink / raw)
To: Dmitry; +Cc: Jan Kara, linux-fsdevel, hch, Dmitry Monakhov
On Wed 06-10-10 17:17:03, Dmitry wrote:
> Later i've plans to get rid of dqptr_sem by rearranging code like follows.
> But may be i've introduced copy logic too soon.
>
> Variant (1)
> 1) Protect i_dquot dereference/modification with i_lock
> So charge/free function will looks like follows
> do_alloc_space (struct inode* inode, qsize_t num)
> {
> spin_lock(&inode->i_lock);
> if (!check_bdq(inode->i_dquot, num))
> return EQDUOT;
> inode_add_bytes_unlocked(inode, num);
> dquot_add_bytes(inode->i_dquot, num);
> /* We can sleep while we call quota dirty, to protect quota from
> * being deleted we have to get ref for the quota.
> */
> atomic_inc(dquot->dq_count);
> dquot = inode->i_dquot;
> spin_unlock(&inode->i_lock);
> /* i_lock was dropped, do not touch i_dquot any more */
> mark_dquot_dirty(dquot);
> dqput(dquot);
> }
> This is beter than before, but we still have massive anomic operations
> So we can try second variant.
>
> Variant (2)
> do_alloc_space(struct inode* inode, qsize_t num)
> {
> idx = srcu_read_lock(&dqptr)
> spin_lock(&inode->i_lock);
> if (!check_bdq(inode->i_dquot, num))
> return EQDUOT;
> inode_add_bytes_unlocked(inode, num);
> dquot_add_bytes(inode->i_dquot, num);
> /* We can sleep while we call quota dirty,
> * But this is ok since quota deletion is protected by SRCU
> * The only thing we have do is to save old i_dquot array
> */
> dquot = inode->i_dquot;
> spin_unlock(&inode->i_lock);
> /* i_lock was dropped, do not touch i_dquot any more */
> mark_dquot_dirty(dquot);
> srcu_read_unlock(&dqptr, idx);
> }
> /* Destroy method may looks like whis, it will be called from
> background work.
> */
> do_destroy_quotas_loop() {
> /* Wait for all quotas in free list */
> synchronize_srcu(dqptr&)
> /*
> Ok now we can safely destroy all quotas in the list,
> Some of them may be dirty, so we have to write it first.
> */
> list_for_each_entry(dquot, &free_list, dq_free)
> dquot_release(dquot);
>
> }
I see. Protecting dquots with RCU seems like a reasonable thing to do
and then I agree you'd need to copy the dquot pointers. But for now please
refrain from doing so. Thanks.
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-06 12:37 ` Jan Kara
2010-10-06 13:17 ` Dmitry
@ 2010-10-06 14:19 ` Dmitry
1 sibling, 0 replies; 35+ messages in thread
From: Dmitry @ 2010-10-06 14:19 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Wed, 6 Oct 2010 14:37:27 +0200, Jan Kara <jack@suse.cz> wrote:
> On Tue 05-10-10 22:20:25, Dmitry Monakhov wrote:
> > currently ->dq_data_lock is responsible for protecting three things
> >
> > 1) dquot->dq_dqb info consistency
> > 2) synchronization between ->dq_dqb with ->i_bytes
> > 3) Protects mem_dqinfo (per-sb data),
> > 3b) and consystency between mem_dqinfo and dq_dqb for following data.
> ^^ consistency
>
> > dqi_bgrace <=> dqb_btime
> > dqi_igrace <=> dqb_itime
> >
> > In fact (1) and (2) is conceptually different from (3)
> > By introducing per-dquot data lock we later can split (1)(2) from (3)
> > This patch simply introduce new lock, without changing ->dq_data_lock.
> ^^^^^^^^ introduces a
>
> I'd be interested in how much this split brings. The hold time of
> dq_data_lock isn't so big and I wonder if the higher scalability won't
> be mitigated by cache ping-pongs of structures themselves...
Currently it is unlikely to observe any difference at all
because we end up with blocking on dq_mutex, dqio_mutex and i_mutex :(
I'll plan optimization for io mutexes in third series of scalability
patches. But nor than less, i'll try to meashure ext2 over tmpfs.
>
> > @@ -378,6 +380,37 @@ static inline void dqput_all(struct dquot **dquot)
> > dqput(dquot[cnt]);
> > }
> >
> > +static inline void inode_dquot_lock(const struct inode *inode,
> > + struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> > + dquot[cnt] = inode->i_dquot[cnt];
> > + if (dquot[cnt])
> > + spin_lock(&dquot[cnt]->dq_lock);
> > + }
> > +}
> > +
> > +static inline void dquot_lock_all(struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> > + if (dquot[cnt])
> > + spin_lock(&dquot[cnt]->dq_lock);
> > +
> > +}
> > +
> > +static inline void dquot_unlock_all(struct dquot **dquot)
> > +{
> > + unsigned int cnt;
> > +
> > + for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> > + if (dquot[cnt])
> > + spin_unlock(&dquot[cnt]->dq_lock);
> > +}
> Please, just have two locking functions like:
> lock_inode_dquots(inode->i_dquot), unlock_inode_dquots(inode->i_dquot).
> Also please avoid this copying of pointers to dquot structures. It just
> makes things harder to read and since dquot structures are reference
> counted, it's also not obviously correct strictly speaking (although fine
> in the end in your case since you hold dqptr_sem). If you wish to save some
> typing, just keep inode->i_dquot in a local variable.
>
> Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-05 18:20 ` [PATCH 09/11] quota: protect dquot mem info with objects's lock Dmitry Monakhov
2010-10-06 12:37 ` Jan Kara
@ 2010-10-06 13:30 ` Jan Kara
2010-10-06 13:41 ` Dmitry
1 sibling, 1 reply; 35+ messages in thread
From: Jan Kara @ 2010-10-06 13:30 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:25, Dmitry Monakhov wrote:
> @@ -1762,14 +1810,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
> return 0;
> }
> spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> + inode_dquot_lock(inode, transfer_from);
> cur_space = inode_get_bytes(inode);
> rsv_space = inode_get_rsv_space(inode);
> space = cur_space + rsv_space;
> + dquot_lock_all(transfer_to);
One more problem I've spotted - here you cannot lock new and old dquot
structures in an arbitrary order as that would lead to deadlocks when two
dquot_transfers run in parallel. So you have to establish some ordering and
follow that while locking...
Honza
> /* Build the transfer_from list and check the limits */
> +
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> if (!transfer_to[cnt])
> continue;
> - transfer_from[cnt] = inode->i_dquot[cnt];
> ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
> if (ret)
> goto over_quota;
> @@ -1806,6 +1856,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
>
> inode->i_dquot[cnt] = transfer_to[cnt];
> }
> + dquot_unlock_all(transfer_to);
> + dquot_unlock_all(transfer_from);
> spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
>
> @@ -1820,6 +1872,8 @@ warn:
> flush_warnings(transfer_from, warntype_from_space);
> return ret;
> over_quota:
> + dquot_unlock_all(transfer_to);
> + dquot_unlock_all(transfer_from);
> spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
> goto warn;
> @@ -2363,6 +2417,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> di->d_id = dquot->dq_id;
>
> spin_lock(&dq_opt(dquot)->dq_data_lock);
> + spin_lock(&dquot->dq_lock);
> di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
> di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
> di->d_ino_hardlimit = dm->dqb_ihardlimit;
> @@ -2371,6 +2426,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> di->d_icount = dm->dqb_curinodes;
> di->d_btimer = dm->dqb_btime;
> di->d_itimer = dm->dqb_itime;
> + spin_unlock(&dquot->dq_lock);
> spin_unlock(&dq_opt(dquot)->dq_data_lock);
> }
>
> @@ -2415,6 +2471,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> return -ERANGE;
>
> spin_lock(&dq_opt(dquot)->dq_data_lock);
> + spin_lock(&dquot->dq_lock);
> if (di->d_fieldmask & FS_DQ_BCOUNT) {
> dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
> check_blim = 1;
> @@ -2480,6 +2537,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> clear_bit(DQ_FAKE_B, &dquot->dq_flags);
> else
> set_bit(DQ_FAKE_B, &dquot->dq_flags);
> + spin_unlock(&dquot->dq_lock);
> spin_unlock(&dq_opt(dquot)->dq_data_lock);
> mark_dquot_dirty(dquot);
>
> diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> index 8b04f24..1643c30 100644
> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -376,7 +376,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> }
> }
> spin_lock(&sb_dqopt(sb)->dq_data_lock);
> + spin_lock(&dquot->dq_lock);
> info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
> + spin_unlock(&dquot->dq_lock);
> spin_unlock(&sb_dqopt(sb)->dq_data_lock);
> ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
> dquot->dq_off);
> @@ -632,12 +634,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> goto out;
> }
> spin_lock(&sb_dqopt(sb)->dq_data_lock);
> + spin_lock(&dquot->dq_lock);
> info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
> if (!dquot->dq_dqb.dqb_bhardlimit &&
> !dquot->dq_dqb.dqb_bsoftlimit &&
> !dquot->dq_dqb.dqb_ihardlimit &&
> !dquot->dq_dqb.dqb_isoftlimit)
> set_bit(DQ_FAKE_B, &dquot->dq_flags);
> + spin_unlock(&dquot->dq_lock);
> spin_unlock(&sb_dqopt(sb)->dq_data_lock);
> kfree(ddquot);
> out:
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index 9e7a102..197660f 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -294,6 +294,7 @@ struct dquot {
> unsigned long dq_flags; /* See DQ_* */
> short dq_type; /* Type of quota */
> struct mem_dqblk dq_dqb; /* Diskquota usage */
> + spinlock_t dq_lock; /* protect in mem_dqblk */
> };
>
> /* Operations which must be implemented by each quota format */
> --
> 1.6.6.1
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 09/11] quota: protect dquot mem info with objects's lock
2010-10-06 13:30 ` Jan Kara
@ 2010-10-06 13:41 ` Dmitry
0 siblings, 0 replies; 35+ messages in thread
From: Dmitry @ 2010-10-06 13:41 UTC (permalink / raw)
To: Jan Kara; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Wed, 6 Oct 2010 15:30:18 +0200, Jan Kara <jack@suse.cz> wrote:
> On Tue 05-10-10 22:20:25, Dmitry Monakhov wrote:
> > @@ -1762,14 +1810,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
> > return 0;
> > }
> > spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> > + inode_dquot_lock(inode, transfer_from);
> > cur_space = inode_get_bytes(inode);
> > rsv_space = inode_get_rsv_space(inode);
> > space = cur_space + rsv_space;
> > + dquot_lock_all(transfer_to);
> One more problem I've spotted - here you cannot lock new and old dquot
> structures in an arbitrary order as that would lead to deadlocks when two
> dquot_transfers run in parallel. So you have to establish some ordering and
> follow that while locking...
Absolutely, WOW, i've even written a remiander for that somewhere,
And as usually simply forgot about that :(
Minor issue: I've also forgot to place different quota types to different
lockdep classes. Which result complain from lockdep. Also will fix.
>
> Honza
>
> > /* Build the transfer_from list and check the limits */
> > +
> > for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> > if (!transfer_to[cnt])
> > continue;
> > - transfer_from[cnt] = inode->i_dquot[cnt];
> > ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
> > if (ret)
> > goto over_quota;
> > @@ -1806,6 +1856,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
> >
> > inode->i_dquot[cnt] = transfer_to[cnt];
> > }
> > + dquot_unlock_all(transfer_to);
> > + dquot_unlock_all(transfer_from);
> > spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> > up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
> >
> > @@ -1820,6 +1872,8 @@ warn:
> > flush_warnings(transfer_from, warntype_from_space);
> > return ret;
> > over_quota:
> > + dquot_unlock_all(transfer_to);
> > + dquot_unlock_all(transfer_from);
> > spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> > up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
> > goto warn;
> > @@ -2363,6 +2417,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> > di->d_id = dquot->dq_id;
> >
> > spin_lock(&dq_opt(dquot)->dq_data_lock);
> > + spin_lock(&dquot->dq_lock);
> > di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
> > di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
> > di->d_ino_hardlimit = dm->dqb_ihardlimit;
> > @@ -2371,6 +2426,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> > di->d_icount = dm->dqb_curinodes;
> > di->d_btimer = dm->dqb_btime;
> > di->d_itimer = dm->dqb_itime;
> > + spin_unlock(&dquot->dq_lock);
> > spin_unlock(&dq_opt(dquot)->dq_data_lock);
> > }
> >
> > @@ -2415,6 +2471,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> > return -ERANGE;
> >
> > spin_lock(&dq_opt(dquot)->dq_data_lock);
> > + spin_lock(&dquot->dq_lock);
> > if (di->d_fieldmask & FS_DQ_BCOUNT) {
> > dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
> > check_blim = 1;
> > @@ -2480,6 +2537,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> > clear_bit(DQ_FAKE_B, &dquot->dq_flags);
> > else
> > set_bit(DQ_FAKE_B, &dquot->dq_flags);
> > + spin_unlock(&dquot->dq_lock);
> > spin_unlock(&dq_opt(dquot)->dq_data_lock);
> > mark_dquot_dirty(dquot);
> >
> > diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> > index 8b04f24..1643c30 100644
> > --- a/fs/quota/quota_tree.c
> > +++ b/fs/quota/quota_tree.c
> > @@ -376,7 +376,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> > }
> > }
> > spin_lock(&sb_dqopt(sb)->dq_data_lock);
> > + spin_lock(&dquot->dq_lock);
> > info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
> > + spin_unlock(&dquot->dq_lock);
> > spin_unlock(&sb_dqopt(sb)->dq_data_lock);
> > ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
> > dquot->dq_off);
> > @@ -632,12 +634,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> > goto out;
> > }
> > spin_lock(&sb_dqopt(sb)->dq_data_lock);
> > + spin_lock(&dquot->dq_lock);
> > info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
> > if (!dquot->dq_dqb.dqb_bhardlimit &&
> > !dquot->dq_dqb.dqb_bsoftlimit &&
> > !dquot->dq_dqb.dqb_ihardlimit &&
> > !dquot->dq_dqb.dqb_isoftlimit)
> > set_bit(DQ_FAKE_B, &dquot->dq_flags);
> > + spin_unlock(&dquot->dq_lock);
> > spin_unlock(&sb_dqopt(sb)->dq_data_lock);
> > kfree(ddquot);
> > out:
> > diff --git a/include/linux/quota.h b/include/linux/quota.h
> > index 9e7a102..197660f 100644
> > --- a/include/linux/quota.h
> > +++ b/include/linux/quota.h
> > @@ -294,6 +294,7 @@ struct dquot {
> > unsigned long dq_flags; /* See DQ_* */
> > short dq_type; /* Type of quota */
> > struct mem_dqblk dq_dqb; /* Diskquota usage */
> > + spinlock_t dq_lock; /* protect in mem_dqblk */
> > };
> >
> > /* Operations which must be implemented by each quota format */
> > --
> > 1.6.6.1
> >
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 10/11] quota: drop dq_data_lock where possible
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (8 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 09/11] quota: protect dquot mem info with objects's lock Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-05 18:20 ` [PATCH 11/11] quota: relax dq_data_lock dq_lock locking consistency Dmitry Monakhov
2010-10-06 7:08 ` [PATCH 0/11] RFC quota scalability V1 Dmitry
11 siblings, 0 replies; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
FIXME: I've skipped ocfs2 code, because i dont understand it.
Let's Jan do it someday.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 2 --
fs/quota/quota_tree.c | 4 ----
2 files changed, 0 insertions(+), 6 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 2c87709..fc3b63a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2416,7 +2416,6 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
FS_USER_QUOTA : FS_GROUP_QUOTA;
di->d_id = dquot->dq_id;
- spin_lock(&dq_opt(dquot)->dq_data_lock);
spin_lock(&dquot->dq_lock);
di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
@@ -2427,7 +2426,6 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
di->d_btimer = dm->dqb_btime;
di->d_itimer = dm->dqb_itime;
spin_unlock(&dquot->dq_lock);
- spin_unlock(&dq_opt(dquot)->dq_data_lock);
}
int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 1643c30..49e3419 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -375,11 +375,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
return ret;
}
}
- spin_lock(&sb_dqopt(sb)->dq_data_lock);
spin_lock(&dquot->dq_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
spin_unlock(&dquot->dq_lock);
- spin_unlock(&sb_dqopt(sb)->dq_data_lock);
ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
dquot->dq_off);
if (ret != info->dqi_entry_size) {
@@ -633,7 +631,6 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
kfree(ddquot);
goto out;
}
- spin_lock(&sb_dqopt(sb)->dq_data_lock);
spin_lock(&dquot->dq_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
@@ -642,7 +639,6 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
spin_unlock(&dquot->dq_lock);
- spin_unlock(&sb_dqopt(sb)->dq_data_lock);
kfree(ddquot);
out:
dqstats_inc(DQST_READS);
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* [PATCH 11/11] quota: relax dq_data_lock dq_lock locking consistency
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (9 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 10/11] quota: drop dq_data_lock where possible Dmitry Monakhov
@ 2010-10-05 18:20 ` Dmitry Monakhov
2010-10-06 11:56 ` Jan Kara
2010-10-06 7:08 ` [PATCH 0/11] RFC quota scalability V1 Dmitry
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry Monakhov @ 2010-10-05 18:20 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
In fact consistency between mem_info and dq_dqb is weak because we
just copy data from dqi_{bi}grace to dqb_{bi}time.
So we protect dqb_{bi}time from races with quota_ctl call.
Nothing actually happens if we relax this consistency requirement.
Since dqi_{bi}grace is int it is possible read it atomically without lock.
Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
---
fs/quota/dquot.c | 28 ++++++++--------------------
1 files changed, 8 insertions(+), 20 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fc3b63a..0c9cf67 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1254,7 +1254,7 @@ static int ignore_hardlimit(struct dquot *dquot)
!(info->dqi_flags & V1_DQF_RSQUASH));
}
-/* needs dq_data_lock, ->dq_lock */
+/* needs ->dq_lock */
static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
{
qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
@@ -1284,6 +1284,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
newinodes > dquot->dq_dqb.dqb_isoftlimit &&
dquot->dq_dqb.dqb_itime == 0) {
*warntype = QUOTA_NL_ISOFTWARN;
+ /* It is ok to read dqi_bgrace without lock here */
dquot->dq_dqb.dqb_itime = get_seconds() +
dq_opt(dquot)->info[dquot->dq_type].dqi_igrace;
}
@@ -1291,7 +1292,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
return 0;
}
-/* needs dq_data_lock, ->dq_lock */
+/* needs ->dq_lock */
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
qsize_t tspace;
@@ -1328,6 +1329,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
*warntype = QUOTA_NL_BSOFTWARN;
+ /* It is ok to read dqi_bgrace without lock here */
dquot->dq_dqb.dqb_btime = get_seconds() +
sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace;
}
@@ -1596,7 +1598,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquot[cnt])
@@ -1604,7 +1605,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
ret = check_bdq(dquot[cnt], number, !warn, warntype+cnt);
if (ret && !nofail) {
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
goto out_flush_warn;
}
}
@@ -1618,7 +1618,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
}
inode_incr_space(inode, number, reserve);
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_flush_warn;
@@ -1647,7 +1646,6 @@ int dquot_alloc_inode(const struct inode *inode)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquot[cnt])
@@ -1665,7 +1663,6 @@ int dquot_alloc_inode(const struct inode *inode)
warn_put_all:
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (ret == 0)
mark_all_dquot_dirty(dquot);
flush_warnings(dquot, warntype);
@@ -1688,7 +1685,6 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, dquot);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1698,7 +1694,6 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
/* Update inode bytes */
inode_claim_rsv_space(inode, number);
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
mark_all_dquot_dirty(dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
@@ -1723,7 +1718,6 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquot[cnt])
@@ -1736,7 +1730,6 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
}
inode_decr_space(inode, number, reserve);
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
if (reserve)
goto out_unlock;
@@ -1762,7 +1755,6 @@ void dquot_free_inode(const struct inode *inode)
return;
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, dquot);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquot[cnt])
@@ -1771,7 +1763,6 @@ void dquot_free_inode(const struct inode *inode)
dquot_decr_inodes(dquot[cnt], 1);
}
dquot_unlock_all(dquot);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
mark_all_dquot_dirty(dquot);
flush_warnings(dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1809,7 +1800,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
- spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
inode_dquot_lock(inode, transfer_from);
cur_space = inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode);
@@ -1858,7 +1848,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
}
dquot_unlock_all(transfer_to);
dquot_unlock_all(transfer_from);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
mark_all_dquot_dirty(transfer_from);
@@ -1874,7 +1863,6 @@ warn:
over_quota:
dquot_unlock_all(transfer_to);
dquot_unlock_all(transfer_from);
- spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
goto warn;
}
@@ -2468,7 +2456,6 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
(di->d_ino_hardlimit > dqi->dqi_maxilimit)))
return -ERANGE;
- spin_lock(&dq_opt(dquot)->dq_data_lock);
spin_lock(&dquot->dq_lock);
if (di->d_fieldmask & FS_DQ_BCOUNT) {
dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
@@ -2518,7 +2505,8 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
dm->dqb_btime = 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
} else if (!(di->d_fieldmask & FS_DQ_BTIMER))
- /* Set grace only if user hasn't provided his own... */
+ /* Set grace only if user hasn't provided his own...
+ Read dqi_bgrace without lock */
dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
}
if (check_ilim) {
@@ -2527,7 +2515,8 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
dm->dqb_itime = 0;
clear_bit(DQ_INODES_B, &dquot->dq_flags);
} else if (!(di->d_fieldmask & FS_DQ_ITIMER))
- /* Set grace only if user hasn't provided his own... */
+ /* Set grace only if user hasn't provided his own...
+ Read dqi_bgrace without lock */
dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
}
if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit ||
@@ -2536,7 +2525,6 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
else
set_bit(DQ_FAKE_B, &dquot->dq_flags);
spin_unlock(&dquot->dq_lock);
- spin_unlock(&dq_opt(dquot)->dq_data_lock);
mark_dquot_dirty(dquot);
return 0;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 11/11] quota: relax dq_data_lock dq_lock locking consistency
2010-10-05 18:20 ` [PATCH 11/11] quota: relax dq_data_lock dq_lock locking consistency Dmitry Monakhov
@ 2010-10-06 11:56 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 11:56 UTC (permalink / raw)
To: Dmitry Monakhov; +Cc: linux-fsdevel, jack, hch, Dmitry Monakhov
On Tue 05-10-10 22:20:27, Dmitry Monakhov wrote:
> In fact consistency between mem_info and dq_dqb is weak because we
> just copy data from dqi_{bi}grace to dqb_{bi}time.
> So we protect dqb_{bi}time from races with quota_ctl call.
> Nothing actually happens if we relax this consistency requirement.
> Since dqi_{bi}grace is int it is possible read it atomically without lock.
OK, but comments like "It is ok to read dqi_bgrace without lock here"
do not explain us why it is ok. Instead of commenting at each place where
we read those values, I'd add more detailed comment before declaration of
mem_dqinfo that dqi_bgrace and dqi_igrace can be read without the lock and
that it's safe because the reads are atomic.
Honza
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
> ---
> fs/quota/dquot.c | 28 ++++++++--------------------
> 1 files changed, 8 insertions(+), 20 deletions(-)
>
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index fc3b63a..0c9cf67 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -1254,7 +1254,7 @@ static int ignore_hardlimit(struct dquot *dquot)
> !(info->dqi_flags & V1_DQF_RSQUASH));
> }
>
> -/* needs dq_data_lock, ->dq_lock */
> +/* needs ->dq_lock */
> static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
> {
> qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
> @@ -1284,6 +1284,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
> newinodes > dquot->dq_dqb.dqb_isoftlimit &&
> dquot->dq_dqb.dqb_itime == 0) {
> *warntype = QUOTA_NL_ISOFTWARN;
> + /* It is ok to read dqi_bgrace without lock here */
> dquot->dq_dqb.dqb_itime = get_seconds() +
> dq_opt(dquot)->info[dquot->dq_type].dqi_igrace;
> }
> @@ -1291,7 +1292,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
> return 0;
> }
>
> -/* needs dq_data_lock, ->dq_lock */
> +/* needs ->dq_lock */
> static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
> {
> qsize_t tspace;
> @@ -1328,6 +1329,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
> dquot->dq_dqb.dqb_btime == 0) {
> if (!prealloc) {
> *warntype = QUOTA_NL_BSOFTWARN;
> + /* It is ok to read dqi_bgrace without lock here */
> dquot->dq_dqb.dqb_btime = get_seconds() +
> sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace;
> }
> @@ -1596,7 +1598,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> warntype[cnt] = QUOTA_NL_NOWARN;
>
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, dquot);
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> if (!dquot[cnt])
> @@ -1604,7 +1605,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
> ret = check_bdq(dquot[cnt], number, !warn, warntype+cnt);
> if (ret && !nofail) {
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> goto out_flush_warn;
> }
> }
> @@ -1618,7 +1618,6 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
> }
> inode_incr_space(inode, number, reserve);
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
>
> if (reserve)
> goto out_flush_warn;
> @@ -1647,7 +1646,6 @@ int dquot_alloc_inode(const struct inode *inode)
> for (cnt = 0; cnt < MAXQUOTAS; cnt++)
> warntype[cnt] = QUOTA_NL_NOWARN;
> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, dquot);
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> if (!dquot[cnt])
> @@ -1665,7 +1663,6 @@ int dquot_alloc_inode(const struct inode *inode)
>
> warn_put_all:
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> if (ret == 0)
> mark_all_dquot_dirty(dquot);
> flush_warnings(dquot, warntype);
> @@ -1688,7 +1685,6 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
> }
>
> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, dquot);
> /* Claim reserved quotas to allocated quotas */
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> @@ -1698,7 +1694,6 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
> /* Update inode bytes */
> inode_claim_rsv_space(inode, number);
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> mark_all_dquot_dirty(dquot);
> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> return 0;
> @@ -1723,7 +1718,6 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
> }
>
> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, dquot);
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> if (!dquot[cnt])
> @@ -1736,7 +1730,6 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
> }
> inode_decr_space(inode, number, reserve);
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
>
> if (reserve)
> goto out_unlock;
> @@ -1762,7 +1755,6 @@ void dquot_free_inode(const struct inode *inode)
> return;
>
> down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, dquot);
> for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
> if (!dquot[cnt])
> @@ -1771,7 +1763,6 @@ void dquot_free_inode(const struct inode *inode)
> dquot_decr_inodes(dquot[cnt], 1);
> }
> dquot_unlock_all(dquot);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> mark_all_dquot_dirty(dquot);
> flush_warnings(dquot, warntype);
> up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
> @@ -1809,7 +1800,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
> up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
> return 0;
> }
> - spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> inode_dquot_lock(inode, transfer_from);
> cur_space = inode_get_bytes(inode);
> rsv_space = inode_get_rsv_space(inode);
> @@ -1858,7 +1848,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
> }
> dquot_unlock_all(transfer_to);
> dquot_unlock_all(transfer_from);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
>
> mark_all_dquot_dirty(transfer_from);
> @@ -1874,7 +1863,6 @@ warn:
> over_quota:
> dquot_unlock_all(transfer_to);
> dquot_unlock_all(transfer_from);
> - spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock);
> up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
> goto warn;
> }
> @@ -2468,7 +2456,6 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
> return -ERANGE;
>
> - spin_lock(&dq_opt(dquot)->dq_data_lock);
> spin_lock(&dquot->dq_lock);
> if (di->d_fieldmask & FS_DQ_BCOUNT) {
> dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
> @@ -2518,7 +2505,8 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> dm->dqb_btime = 0;
> clear_bit(DQ_BLKS_B, &dquot->dq_flags);
> } else if (!(di->d_fieldmask & FS_DQ_BTIMER))
> - /* Set grace only if user hasn't provided his own... */
> + /* Set grace only if user hasn't provided his own...
> + Read dqi_bgrace without lock */
> dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
> }
> if (check_ilim) {
> @@ -2527,7 +2515,8 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> dm->dqb_itime = 0;
> clear_bit(DQ_INODES_B, &dquot->dq_flags);
> } else if (!(di->d_fieldmask & FS_DQ_ITIMER))
> - /* Set grace only if user hasn't provided his own... */
> + /* Set grace only if user hasn't provided his own...
> + Read dqi_bgrace without lock */
> dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
> }
> if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit ||
> @@ -2536,7 +2525,6 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
> else
> set_bit(DQ_FAKE_B, &dquot->dq_flags);
> spin_unlock(&dquot->dq_lock);
> - spin_unlock(&dq_opt(dquot)->dq_data_lock);
> mark_dquot_dirty(dquot);
>
> return 0;
> --
> 1.6.6.1
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 0/11] RFC quota scalability V1
2010-10-05 18:20 (unknown), Dmitry Monakhov
` (10 preceding siblings ...)
2010-10-05 18:20 ` [PATCH 11/11] quota: relax dq_data_lock dq_lock locking consistency Dmitry Monakhov
@ 2010-10-06 7:08 ` Dmitry
2010-10-06 9:44 ` Jan Kara
11 siblings, 1 reply; 35+ messages in thread
From: Dmitry @ 2010-10-06 7:08 UTC (permalink / raw)
To: linux-fsdevel; +Cc: jack, hch, Dmitry Monakhov
On Tue, 5 Oct 2010 22:20:16 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
Sorry, seems like my script screw-up subject header.
> This patch set is my first attempt to make quota code scalable.
> Main goal of this patch-set is to split global locking to per-sb basis.
> Please review it and provide your opinion.
>
> Future plans:
> * More scalability for single super_block
> ** Remove dqptr_sem
> ** Redesing dquot ref accounting interface.
> ** Make fast path for charge function lockless
>
> quota: add wrapper function
> quota: Convert dq_state_lock to per-sb dq_state_lock
> quota: add quota format lock
> quota: make dquot lists per-sb
> quota: make per-sb hash array
> quota: remove global dq_list_lock
> quota: rename dq_lock
> quota: make per-sb dq_data_lock
> quota: protect dquot mem info with objects's lock
> quota: drop dq_data_lock where possible
> quota: relax dq_data_lock dq_lock locking consistency
>
> fs/ext3/super.c | 2
> fs/ext4/super.c | 2
> fs/ocfs2/quota_global.c | 42 ++--
> fs/ocfs2/quota_local.c | 17 +
> fs/quota/dquot.c | 473 +++++++++++++++++++++++++++--------------------
> fs/quota/quota_tree.c | 12 -
> fs/quota/quota_v1.c | 8
> fs/quota/quota_v2.c | 4
> fs/super.c | 5
> include/linux/quota.h | 20 +
> include/linux/quotaops.h | 4
> 11 files changed, 355 insertions(+), 234 deletions(-)
> Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 0/11] RFC quota scalability V1
2010-10-06 7:08 ` [PATCH 0/11] RFC quota scalability V1 Dmitry
@ 2010-10-06 9:44 ` Jan Kara
2010-10-06 10:15 ` Dmitry
2010-10-10 3:50 ` Brad Boyer
0 siblings, 2 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 9:44 UTC (permalink / raw)
To: Dmitry; +Cc: linux-fsdevel, jack, hch
On Wed 06-10-10 11:08:00, Dmitry wrote:
> On Tue, 5 Oct 2010 22:20:16 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
> Sorry, seems like my script screw-up subject header.
> > This patch set is my first attempt to make quota code scalable.
> > Main goal of this patch-set is to split global locking to per-sb basis.
> > Please review it and provide your opinion.
> >
> > Future plans:
> > * More scalability for single super_block
> > ** Remove dqptr_sem
> > ** Redesing dquot ref accounting interface.
> > ** Make fast path for charge function lockless
> >
> > quota: add wrapper function
> > quota: Convert dq_state_lock to per-sb dq_state_lock
> > quota: add quota format lock
> > quota: make dquot lists per-sb
> > quota: make per-sb hash array
> > quota: remove global dq_list_lock
> > quota: rename dq_lock
> > quota: make per-sb dq_data_lock
> > quota: protect dquot mem info with objects's lock
> > quota: drop dq_data_lock where possible
> > quota: relax dq_data_lock dq_lock locking consistency
So one generic question: I see you split global locks into per-sb ones.
Does it bring substantial advantage? Because from my experience common
systems have just one or maybe two filesystems which are heavily used...
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 0/11] RFC quota scalability V1
2010-10-06 9:44 ` Jan Kara
@ 2010-10-06 10:15 ` Dmitry
2010-10-06 10:47 ` Jan Kara
2010-10-10 3:50 ` Brad Boyer
1 sibling, 1 reply; 35+ messages in thread
From: Dmitry @ 2010-10-06 10:15 UTC (permalink / raw)
To: Jan Kara, Dmitry; +Cc: linux-fsdevel, jack, hch
On Wed, 6 Oct 2010 11:44:31 +0200, Jan Kara <jack@suse.cz> wrote:
> On Wed 06-10-10 11:08:00, Dmitry wrote:
> > On Tue, 5 Oct 2010 22:20:16 +0400, Dmitry Monakhov <dmonakhov@openvz.org> wrote:
> > Sorry, seems like my script screw-up subject header.
> > > This patch set is my first attempt to make quota code scalable.
> > > Main goal of this patch-set is to split global locking to per-sb basis.
> > > Please review it and provide your opinion.
> > >
> > > Future plans:
> > > * More scalability for single super_block
> > > ** Remove dqptr_sem
> > > ** Redesing dquot ref accounting interface.
> > > ** Make fast path for charge function lockless
> > >
BTW i've forgot to mention that patch-set against 2.6.36-rc5
linux-fs-2.6.git for_testing branch
> > > quota: add wrapper function
> > > quota: Convert dq_state_lock to per-sb dq_state_lock
> > > quota: add quota format lock
> > > quota: make dquot lists per-sb
> > > quota: make per-sb hash array
> > > quota: remove global dq_list_lock
> > > quota: rename dq_lock
> > > quota: make per-sb dq_data_lock
> > > quota: protect dquot mem info with objects's lock
> > > quota: drop dq_data_lock where possible
> > > quota: relax dq_data_lock dq_lock locking consistency
> So one generic question: I see you split global locks into per-sb ones.
> Does it bring substantial advantage? Because from my experience common
> systems have just one or maybe two filesystems which are heavily used...
It was so in the past, but currently hardware is too powerful for single
system. Today is the age of virtual environments (containers, cgroups,
etc). Usually each VE has it own filesystem. And people do care about
density, and scalability. So we need scalable quota for each VE.
In fact this patch-set was inspired by Nick's attempt to remove
global inode_lock.
I've skip serious performance testing for that version, i just
want to hear opinions about the way i'm moving.
I can post some numbers for next patch-set version.
>
> Honza
> --
> Jan Kara <jack@suse.cz>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 0/11] RFC quota scalability V1
2010-10-06 10:15 ` Dmitry
@ 2010-10-06 10:47 ` Jan Kara
0 siblings, 0 replies; 35+ messages in thread
From: Jan Kara @ 2010-10-06 10:47 UTC (permalink / raw)
To: Dmitry; +Cc: Jan Kara, Dmitry, linux-fsdevel, hch
On Wed 06-10-10 14:15:38, Dmitry wrote:
> > > > quota: add wrapper function
> > > > quota: Convert dq_state_lock to per-sb dq_state_lock
> > > > quota: add quota format lock
> > > > quota: make dquot lists per-sb
> > > > quota: make per-sb hash array
> > > > quota: remove global dq_list_lock
> > > > quota: rename dq_lock
> > > > quota: make per-sb dq_data_lock
> > > > quota: protect dquot mem info with objects's lock
> > > > quota: drop dq_data_lock where possible
> > > > quota: relax dq_data_lock dq_lock locking consistency
> > So one generic question: I see you split global locks into per-sb ones.
> > Does it bring substantial advantage? Because from my experience common
> > systems have just one or maybe two filesystems which are heavily used...
> It was so in the past, but currently hardware is too powerful for single
> system. Today is the age of virtual environments (containers, cgroups,
> etc). Usually each VE has it own filesystem. And people do care about
OK, in the virtualization environments I've seen, they usually used a
single filesystem as a backing store for fs images. I imagine that if you
use containers, you may have incentive to have a separate fs for each
container although I thought that people used bind mounts and such stuff
in these cases - I've never seen a real system using containers ;). Anyway
if you know about systems that would take advantage of your split then I'm
OK with that. The split does not harm the single fs case and is simple
enough...
Honza
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 0/11] RFC quota scalability V1
2010-10-06 9:44 ` Jan Kara
2010-10-06 10:15 ` Dmitry
@ 2010-10-10 3:50 ` Brad Boyer
1 sibling, 0 replies; 35+ messages in thread
From: Brad Boyer @ 2010-10-10 3:50 UTC (permalink / raw)
To: Jan Kara; +Cc: Dmitry, linux-fsdevel, hch
On Wed, Oct 06, 2010 at 11:44:31AM +0200, Jan Kara wrote:
> So one generic question: I see you split global locks into per-sb ones.
> Does it bring substantial advantage? Because from my experience common
> systems have just one or maybe two filesystems which are heavily used...
I don't know if they had quotas enabled, and I can't comment on how common
it is, but I've seen several places have hundreds of filesystems mounted
at once. It seemed kind of odd to me as well, but it does happen some places.
Brad Boyer
flar@allandria.com
^ permalink raw reply [flat|nested] 35+ messages in thread