All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Monakhov <dmonakhov@openvz.org>
To: Jan Kara <jack@suse.cz>
Cc: linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH] quota: Make quota stat accounting lockless.
Date: Mon, 26 Apr 2010 20:14:24 +0400	[thread overview]
Message-ID: <87r5m2chsv.fsf@openvz.org> (raw)
In-Reply-To: <20100426152407.GE3673@quack.suse.cz> (Jan Kara's message of "Mon, 26 Apr 2010 17:24:08 +0200")

[-- Attachment #1: Type: text/plain, Size: 1100 bytes --]

Jan Kara <jack@suse.cz> writes:

> On Mon 26-04-10 17:16:23, Jan Kara wrote:
>> On Fri 23-04-10 09:31:41, Dmitry Monakhov wrote:
>> > 
>> > I've written this patch long time ago, it is pretty simple, and
>> > allow more non obvious locking optimization to be done later.
>>   The patch looks OK to me. Will merge it.
>   Hmm, maybe I spoke to soon. One question:
>
>> > +void dqstats_inc(unsigned int type)
>> > +{
>> > +#ifdef CONFIG_SMP
>> > +	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
>> > +#else
>> > +	dqstats[type]++;
>> > +#endif
>> > +}
>> > +
>> > +void dqstats_dec(unsigned int type)
>> > +{
>> > +#ifdef CONFIG_SMP
>> > +	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
>> > +#else
>> > +	dqstats[type]--;
>> > +#endif
>> > +}
>   Why not make these two functions inline? Since they are used
> also from quota_tree.c and quota_v1.c, we'd need to move them to a
> quota.h header in that case.
My initial goal was to keep inc/dec/read helpers together.
We still need to export dqstats_pcpu. But if you prefer this style
i'm ok with it. New version attached.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-quota-Make-quota-stat-accounting-lockless.patch --]
[-- Type: text/x-diff, Size: 11243 bytes --]

>From 6d3b422f18882896df1a862d081209c1938bc7a4 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 26 Apr 2010 20:03:33 +0400
Subject: [PATCH] quota: Make quota stat accounting lockless.

Quota stats it mostly writable data structure. Let's alloc percpu
bucket for each value.

NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
and may return inconsistent value. But this is ok since absolute
accuracy is not required.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/quota/dquot.c      |  106 ++++++++++++++++++++++++++++++++-----------------
 fs/quota/quota_tree.c |    4 +-
 fs/quota/quota_v1.c   |    4 +-
 include/linux/quota.h |   41 ++++++++++++++-----
 4 files changed, 105 insertions(+), 50 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 05c590e..244e198 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
 
 /*
  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats, dqstats structure containing statistics about the lists
+ * and quota formats.
  * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
  * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
  * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -131,7 +131,11 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
 EXPORT_SYMBOL(dq_data_lock);
-
+#ifdef CONFIG_SMP
+struct dqstats *dqstats_pcpu;
+EXPORT_SYMBOL(dqstats_pcpu);
+#endif
+struct dqstats  dqstats;
 static char *quotatypes[] = INITQFNAMES;
 static struct quota_format_type *quota_formats;	/* List of registered formats */
 static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
@@ -273,7 +277,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
 static inline void put_dquot_last(struct dquot *dquot)
 {
 	list_add_tail(&dquot->dq_free, &free_dquots);
-	dqstats.free_dquots++;
+	dqstats_inc(DQST_FREE_DQUOTS);
 }
 
 static inline void remove_free_dquot(struct dquot *dquot)
@@ -281,7 +285,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
 	if (list_empty(&dquot->dq_free))
 		return;
 	list_del_init(&dquot->dq_free);
-	dqstats.free_dquots--;
+	dqstats_dec(DQST_FREE_DQUOTS);
 }
 
 static inline void put_inuse(struct dquot *dquot)
@@ -289,12 +293,12 @@ static inline void put_inuse(struct dquot *dquot)
 	/* We add to the back of inuse list so we don't have to restart
 	 * when traversing this list and we block */
 	list_add_tail(&dquot->dq_inuse, &inuse_list);
-	dqstats.allocated_dquots++;
+	dqstats_inc(DQST_ALLOC_DQUOTS);
 }
 
 static inline void remove_inuse(struct dquot *dquot)
 {
-	dqstats.allocated_dquots--;
+	dqstats_dec(DQST_ALLOC_DQUOTS);
 	list_del(&dquot->dq_inuse);
 }
 /*
@@ -559,8 +563,8 @@ int dquot_scan_active(struct super_block *sb,
 			continue;
 		/* Now we have active dquot so we can just increase use count */
 		atomic_inc(&dquot->dq_count);
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_LOOKUPS);
 		dqput(old_dquot);
 		old_dquot = dquot;
 		ret = fn(dquot, priv);
@@ -605,8 +609,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
  			 * holding reference so we can safely just increase
 			 * use count */
 			atomic_inc(&dquot->dq_count);
-			dqstats.lookups++;
 			spin_unlock(&dq_list_lock);
+			dqstats_inc(DQST_LOOKUPS);
 			sb->dq_op->write_dquot(dquot);
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
@@ -618,9 +622,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
 		if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
 		    && info_dirty(&dqopt->info[cnt]))
 			sb->dq_op->write_info(sb, cnt);
-	spin_lock(&dq_list_lock);
-	dqstats.syncs++;
-	spin_unlock(&dq_list_lock);
+	dqstats_inc(DQST_SYNCS);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -672,6 +674,22 @@ static void prune_dqcache(int count)
 	}
 }
 
+static int dqstats_read(unsigned int type)
+{
+	int count = 0;
+	int cpu;
+#ifdef CONFIG_SMP
+	for_each_possible_cpu(cpu)
+		count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+	/* Statistics reading is racy, but absolute accuracy isn't required */
+	if (count < 0)
+		count = 0;
+#else
+	count = dqstats[type];
+#endif
+	return count;
+}
+
 /*
  * This is called from kswapd when we think we need some
  * more memory
@@ -684,7 +702,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 		prune_dqcache(nr);
 		spin_unlock(&dq_list_lock);
 	}
-	return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+	return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker dqcache_shrinker = {
@@ -712,10 +730,7 @@ void dqput(struct dquot *dquot)
 		BUG();
 	}
 #endif
-	
-	spin_lock(&dq_list_lock);
-	dqstats.drops++;
-	spin_unlock(&dq_list_lock);
+	dqstats_inc(DQST_DROPS);
 we_slept:
 	spin_lock(&dq_list_lock);
 	if (atomic_read(&dquot->dq_count) > 1) {
@@ -832,15 +847,16 @@ we_slept:
 		put_inuse(dquot);
 		/* hash it first so it can be found */
 		insert_dquot_hash(dquot);
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_LOOKUPS);
+
 	} else {
 		if (!atomic_read(&dquot->dq_count))
 			remove_free_dquot(dquot);
 		atomic_inc(&dquot->dq_count);
-		dqstats.cache_hits++;
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_CACHE_HITS);
+		dqstats_inc(DQST_LOOKUPS);
 	}
 	/* Wait for dq_lock - after this we know that either dquot_release() is
 	 * already finished or it will be canceled due to dq_count > 1 test */
@@ -2474,68 +2490,79 @@ const struct quotactl_ops vfs_quotactl_ops = {
 	.set_dqblk	= vfs_set_dqblk
 };
 
+
+static int do_proc_dqstats(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+	/* Update global table */
+	unsigned int type = (int *)table->data - dqstats.stat;
+	dqstats.stat[type] = dqstats_read(type);
+#endif
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
 static ctl_table fs_dqstats_table[] = {
 	{
 		.procname	= "lookups",
-		.data		= &dqstats.lookups,
+		.data		= &dqstats.stat[DQST_LOOKUPS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "drops",
-		.data		= &dqstats.drops,
+		.data		= &dqstats.stat[DQST_DROPS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "reads",
-		.data		= &dqstats.reads,
+		.data		= &dqstats.stat[DQST_READS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "writes",
-		.data		= &dqstats.writes,
+		.data		= &dqstats.stat[DQST_WRITES],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "cache_hits",
-		.data		= &dqstats.cache_hits,
+		.data		= &dqstats.stat[DQST_CACHE_HITS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "allocated_dquots",
-		.data		= &dqstats.allocated_dquots,
+		.data		= &dqstats.stat[DQST_ALLOC_DQUOTS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "free_dquots",
-		.data		= &dqstats.free_dquots,
+		.data		= &dqstats.stat[DQST_FREE_DQUOTS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "syncs",
-		.data		= &dqstats.syncs,
+		.data		= &dqstats.stat[DQST_SYNCS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 #ifdef CONFIG_PRINT_QUOTA_WARNING
 	{
 		.procname	= "warnings",
 		.data		= &flag_print_warnings,
-		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
@@ -2581,6 +2608,13 @@ static int __init dquot_init(void)
 	if (!dquot_hash)
 		panic("Cannot create dquot hash table");
 
+#ifdef CONFIG_SMP
+	dqstats_pcpu = alloc_percpu(struct dqstats);
+	if (!dqstats_pcpu)
+		panic("Cannot create dquot stats table");
+#endif
+	memset(&dqstats, 0, sizeof(struct dqstats));
+
 	/* Find power-of-two hlist_heads which can fit into allocation */
 	nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
 	dq_hash_bits = 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bc..5b7f741 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	} else {
 		ret = 0;
 	}
-	dqstats.writes++;
+	dqstats_inc(DQST_WRITES);
 	kfree(ddquot);
 
 	return ret;
@@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	spin_unlock(&dq_data_lock);
 	kfree(ddquot);
 out:
-	dqstats.reads++;
+	dqstats_inc(DQST_READS);
 	return ret;
 }
 EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e..4af344c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
 	    dquot->dq_dqb.dqb_ihardlimit == 0 &&
 	    dquot->dq_dqb.dqb_isoftlimit == 0)
 		set_bit(DQ_FAKE_B, &dquot->dq_flags);
-	dqstats.reads++;
+	dqstats_inc(DQST_READS);
 
 	return 0;
 }
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
 	ret = 0;
 
 out:
-	dqstats.writes++;
+	dqstats_inc(DQST_WRITES);
 
 	return ret;
 }
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b462916..0f5b4a7 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -174,6 +174,8 @@ enum {
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
@@ -237,19 +239,38 @@ static inline int info_dirty(struct mem_dqinfo *info)
 {
 	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
 }
-
+enum {
+	DQST_LOOKUPS,
+	DQST_DROPS,
+	DQST_READS,
+	DQST_WRITES,
+	DQST_CACHE_HITS,
+	DQST_ALLOC_DQUOTS,
+	DQST_FREE_DQUOTS,
+	DQST_SYNCS,
+	_DQST_DQSTAT_LAST
+};
 struct dqstats {
-	int lookups;
-	int drops;
-	int reads;
-	int writes;
-	int cache_hits;
-	int allocated_dquots;
-	int free_dquots;
-	int syncs;
+	int stat[_DQST_DQSTAT_LAST];
 };
 
-extern struct dqstats dqstats;
+extern struct dqstats *dqstats_pcpu;
+static inline void dqstats_inc(unsigned int type)
+{
+#ifdef CONFIG_SMP
+	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
+#else
+	dqstats[type]++;
+#endif
+}
+static inline void dqstats_dec(unsigned int type)
+{
+#ifdef CONFIG_SMP
+	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
+#else
+	dqstats[type]--;
+#endif
+}
 
 #define DQ_MOD_B	0	/* dquot modified since read */
 #define DQ_BLKS_B	1	/* uid/gid has been warned about blk limit */
-- 
1.6.6.1


  reply	other threads:[~2010-04-26 16:14 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-23  5:31 [PATCH] quota: Make quota stat accounting lockless Dmitry Monakhov
2010-04-26 15:16 ` Jan Kara
2010-04-26 15:24   ` Jan Kara
2010-04-26 16:14     ` Dmitry Monakhov [this message]
2010-04-26 17:37       ` Jan Kara
2010-04-26 20:14         ` Jan Kara
2010-04-27  3:54           ` Dmitry Monakhov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r5m2chsv.fsf@openvz.org \
    --to=dmonakhov@openvz.org \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.