From: Jan Kara <jack@suse.cz>
To: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: linux-fsdevel@vger.kernel.org, Jan Kara <jack@suse.cz>
Subject: Re: [PATCH] quota: Make quota stat accounting lockless.
Date: Mon, 26 Apr 2010 17:16:23 +0200 [thread overview]
Message-ID: <20100426151623.GD3673@quack.suse.cz> (raw)
In-Reply-To: <87ljceka4i.fsf@openvz.org>
On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
>
> I've written this patch long time ago, it is pretty simple, and
> allow more non obvious locking optimization to be done later.
The patch looks OK to me. Will merge it.
Honza
> From 234b13091fcb16966c2abcd936c8a0ed99822952 Mon Sep 17 00:00:00 2001
> From: Dmitry Monakhov <dmonakhov@openvz.org>
> Date: Fri, 23 Apr 2010 09:06:41 +0400
> Subject: [PATCH] quota: Make quota stat accounting lockless.
>
> Quota stats it mostly writable data structure. Let's alloc percpu
> bucket for each value.
>
> NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
> and may return inconsistent value. But this is ok since absolute
> accuracy is not required.
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> ---
> fs/quota/dquot.c | 122 +++++++++++++++++++++++++++++++++++--------------
> fs/quota/quota_tree.c | 4 +-
> fs/quota/quota_v1.c | 4 +-
> include/linux/quota.h | 26 ++++++----
> 4 files changed, 106 insertions(+), 50 deletions(-)
>
> diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
> index 05c590e..aa78e26 100644
> --- a/fs/quota/dquot.c
> +++ b/fs/quota/dquot.c
> @@ -82,7 +82,7 @@
>
> /*
> * There are three quota SMP locks. dq_list_lock protects all lists with quotas
> - * and quota formats, dqstats structure containing statistics about the lists
> + * and quota formats.
> * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
> * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
> * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
> @@ -132,6 +132,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
> __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
> EXPORT_SYMBOL(dq_data_lock);
>
> +#ifdef CONFIG_SMP
> +struct dqstats *dqstats_pcpu;
> +#endif
> +struct dqstats dqstats;
> +
> static char *quotatypes[] = INITQFNAMES;
> static struct quota_format_type *quota_formats; /* List of registered formats */
> static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
> @@ -273,7 +278,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
> static inline void put_dquot_last(struct dquot *dquot)
> {
> list_add_tail(&dquot->dq_free, &free_dquots);
> - dqstats.free_dquots++;
> + dqstats_inc(DQST_FREE_DQUOTS);
> }
>
> static inline void remove_free_dquot(struct dquot *dquot)
> @@ -281,7 +286,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
> if (list_empty(&dquot->dq_free))
> return;
> list_del_init(&dquot->dq_free);
> - dqstats.free_dquots--;
> + dqstats_dec(DQST_FREE_DQUOTS);
> }
>
> static inline void put_inuse(struct dquot *dquot)
> @@ -289,12 +294,12 @@ static inline void put_inuse(struct dquot *dquot)
> /* We add to the back of inuse list so we don't have to restart
> * when traversing this list and we block */
> list_add_tail(&dquot->dq_inuse, &inuse_list);
> - dqstats.allocated_dquots++;
> + dqstats_inc(DQST_ALLOC_DQUOTS);
> }
>
> static inline void remove_inuse(struct dquot *dquot)
> {
> - dqstats.allocated_dquots--;
> + dqstats_dec(DQST_ALLOC_DQUOTS);
> list_del(&dquot->dq_inuse);
> }
> /*
> @@ -559,8 +564,8 @@ int dquot_scan_active(struct super_block *sb,
> continue;
> /* Now we have active dquot so we can just increase use count */
> atomic_inc(&dquot->dq_count);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> dqput(old_dquot);
> old_dquot = dquot;
> ret = fn(dquot, priv);
> @@ -605,8 +610,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
> * holding reference so we can safely just increase
> * use count */
> atomic_inc(&dquot->dq_count);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> sb->dq_op->write_dquot(dquot);
> dqput(dquot);
> spin_lock(&dq_list_lock);
> @@ -618,9 +623,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
> if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
> && info_dirty(&dqopt->info[cnt]))
> sb->dq_op->write_info(sb, cnt);
> - spin_lock(&dq_list_lock);
> - dqstats.syncs++;
> - spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_SYNCS);
> mutex_unlock(&dqopt->dqonoff_mutex);
>
> if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
> @@ -684,7 +687,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
> prune_dqcache(nr);
> spin_unlock(&dq_list_lock);
> }
> - return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
> + return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
> }
>
> static struct shrinker dqcache_shrinker = {
> @@ -712,10 +715,7 @@ void dqput(struct dquot *dquot)
> BUG();
> }
> #endif
> -
> - spin_lock(&dq_list_lock);
> - dqstats.drops++;
> - spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_DROPS);
> we_slept:
> spin_lock(&dq_list_lock);
> if (atomic_read(&dquot->dq_count) > 1) {
> @@ -832,15 +832,16 @@ we_slept:
> put_inuse(dquot);
> /* hash it first so it can be found */
> insert_dquot_hash(dquot);
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_LOOKUPS);
> +
> } else {
> if (!atomic_read(&dquot->dq_count))
> remove_free_dquot(dquot);
> atomic_inc(&dquot->dq_count);
> - dqstats.cache_hits++;
> - dqstats.lookups++;
> spin_unlock(&dq_list_lock);
> + dqstats_inc(DQST_CACHE_HITS);
> + dqstats_inc(DQST_LOOKUPS);
> }
> /* Wait for dq_lock - after this we know that either dquot_release() is
> * already finished or it will be canceled due to dq_count > 1 test */
> @@ -2474,68 +2475,112 @@ const struct quotactl_ops vfs_quotactl_ops = {
> .set_dqblk = vfs_set_dqblk
> };
>
> +void dqstats_inc(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
> +#else
> + dqstats[type]++;
> +#endif
> +}
> +
> +void dqstats_dec(unsigned int type)
> +{
> +#ifdef CONFIG_SMP
> + per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
> +#else
> + dqstats[type]--;
> +#endif
> +}
> +
> +int dqstats_read(unsigned int type)
> +{
> + int count = 0;
> + int cpu;
> +#ifdef CONFIG_SMP
> + for_each_possible_cpu(cpu)
> + count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
> + /* Statistics reading is racy, but absolute accuracy isn't required */
> + if (count < 0)
> + count = 0;
> +#else
> + count = dqstats[type];
> +#endif
> + return count;
> +}
> +
> +static int do_proc_dqstats(struct ctl_table *table, int write,
> + void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +#ifdef CONFIG_SMP
> + /* Update global table */
> + unsigned int type = (int *)table->data - dqstats.stat;
> + dqstats.stat[type] = dqstats_read(type);
> +#endif
> + return proc_dointvec(table, write, buffer, lenp, ppos);
> +}
> +
> static ctl_table fs_dqstats_table[] = {
> {
> .procname = "lookups",
> - .data = &dqstats.lookups,
> + .data = &dqstats.stat[DQST_LOOKUPS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "drops",
> - .data = &dqstats.drops,
> + .data = &dqstats.stat[DQST_DROPS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "reads",
> - .data = &dqstats.reads,
> + .data = &dqstats.stat[DQST_READS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "writes",
> - .data = &dqstats.writes,
> + .data = &dqstats.stat[DQST_WRITES],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "cache_hits",
> - .data = &dqstats.cache_hits,
> + .data = &dqstats.stat[DQST_CACHE_HITS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "allocated_dquots",
> - .data = &dqstats.allocated_dquots,
> + .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "free_dquots",
> - .data = &dqstats.free_dquots,
> + .data = &dqstats.stat[DQST_FREE_DQUOTS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> {
> .procname = "syncs",
> - .data = &dqstats.syncs,
> + .data = &dqstats.stat[DQST_SYNCS],
> .maxlen = sizeof(int),
> .mode = 0444,
> - .proc_handler = proc_dointvec,
> + .proc_handler = do_proc_dqstats,
> },
> #ifdef CONFIG_PRINT_QUOTA_WARNING
> {
> .procname = "warnings",
> .data = &flag_print_warnings,
> - .maxlen = sizeof(int),
> .mode = 0644,
> .proc_handler = proc_dointvec,
> },
> @@ -2581,6 +2626,13 @@ static int __init dquot_init(void)
> if (!dquot_hash)
> panic("Cannot create dquot hash table");
>
> +#ifdef CONFIG_SMP
> + dqstats_pcpu = alloc_percpu(struct dqstats);
> + if (!dqstats_pcpu)
> + panic("Cannot create dquot stats table");
> +#endif
> + memset(&dqstats, 0, sizeof(struct dqstats));
> +
> /* Find power-of-two hlist_heads which can fit into allocation */
> nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
> dq_hash_bits = 0;
> diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
> index f81f4bc..5b7f741 100644
> --- a/fs/quota/quota_tree.c
> +++ b/fs/quota/quota_tree.c
> @@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> } else {
> ret = 0;
> }
> - dqstats.writes++;
> + dqstats_inc(DQST_WRITES);
> kfree(ddquot);
>
> return ret;
> @@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
> spin_unlock(&dq_data_lock);
> kfree(ddquot);
> out:
> - dqstats.reads++;
> + dqstats_inc(DQST_READS);
> return ret;
> }
> EXPORT_SYMBOL(qtree_read_dquot);
> diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
> index 2ae757e..4af344c 100644
> --- a/fs/quota/quota_v1.c
> +++ b/fs/quota/quota_v1.c
> @@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
> dquot->dq_dqb.dqb_ihardlimit == 0 &&
> dquot->dq_dqb.dqb_isoftlimit == 0)
> set_bit(DQ_FAKE_B, &dquot->dq_flags);
> - dqstats.reads++;
> + dqstats_inc(DQST_READS);
>
> return 0;
> }
> @@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
> ret = 0;
>
> out:
> - dqstats.writes++;
> + dqstats_inc(DQST_WRITES);
>
> return ret;
> }
> diff --git a/include/linux/quota.h b/include/linux/quota.h
> index b462916..40e0157 100644
> --- a/include/linux/quota.h
> +++ b/include/linux/quota.h
> @@ -237,19 +237,23 @@ static inline int info_dirty(struct mem_dqinfo *info)
> {
> return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
> }
> -
> +enum {
> + DQST_LOOKUPS,
> + DQST_DROPS,
> + DQST_READS,
> + DQST_WRITES,
> + DQST_CACHE_HITS,
> + DQST_ALLOC_DQUOTS,
> + DQST_FREE_DQUOTS,
> + DQST_SYNCS,
> + _DQST_DQSTAT_LAST
> +};
> struct dqstats {
> - int lookups;
> - int drops;
> - int reads;
> - int writes;
> - int cache_hits;
> - int allocated_dquots;
> - int free_dquots;
> - int syncs;
> + int stat[_DQST_DQSTAT_LAST];
> };
> -
> -extern struct dqstats dqstats;
> +extern void dqstats_inc(unsigned int type);
> +extern void dqstats_dec(unsigned int type);
> +extern int dqstats_read(unsigned int type);
>
> #define DQ_MOD_B 0 /* dquot modified since read */
> #define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */
> --
> 1.6.6.1
>
--
Jan Kara <jack@suse.cz>
SUSE Labs, CR
next prev parent reply other threads:[~2010-04-26 15:16 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-23 5:31 [PATCH] quota: Make quota stat accounting lockless Dmitry Monakhov
2010-04-26 15:16 ` Jan Kara [this message]
2010-04-26 15:24 ` Jan Kara
2010-04-26 16:14 ` Dmitry Monakhov
2010-04-26 17:37 ` Jan Kara
2010-04-26 20:14 ` Jan Kara
2010-04-27 3:54 ` Dmitry Monakhov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100426151623.GD3673@quack.suse.cz \
--to=jack@suse.cz \
--cc=dmonakhov@openvz.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).