public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>,
	linux kernel <linux-kernel@vger.kernel.org>,
	Mike Travis <travis@sgi.com>
Subject: [PATCH] modules: Use a better scheme for refcounting
Date: Thu, 15 May 2008 22:40:37 +0200	[thread overview]
Message-ID: <482C9FC5.2070508@cosmosbay.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1003 bytes --]

Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y)
is using a lot of memory.

Each 'struct module' contains an [NR_CPUS] array of full cache lines.

This patch uses existing infrastructure (percpu_modalloc() & 
percpu_modfree())
to allocate percpu space for the refcount storage.

Instead of wasting NR_CPUS*128 bytes (on i386), we now use
num_possible_cpus*sizeof(local_t) bytes.

On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce
size of module files by about 2 Mbytes. (1Kb per module)

Instead of having all refcounters in the same memory node - with TLB misses
because of vmalloc() - this new implementation permits to have better
NUMA properties, since each  CPU will use storage on its preferred node,
thanks to percpu storage.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
 include/linux/module.h |   25 ++++++++++++++++---------
 kernel/module.c        |   37 +++++++++++++++++++++++++++----------
 2 files changed, 43 insertions(+), 19 deletions(-)


[-- Attachment #2: module.patch --]
[-- Type: text/plain, Size: 4856 bytes --]

diff --git a/include/linux/module.h b/include/linux/module.h
index 3e03b1a..72db614 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -217,11 +217,6 @@ void *__symbol_get_gpl(const char *symbol);
 
 #endif
 
-struct module_ref
-{
-	local_t count;
-} ____cacheline_aligned;
-
 enum module_state
 {
 	MODULE_STATE_LIVE,
@@ -307,8 +302,11 @@ struct module
 
 #ifdef CONFIG_MODULE_UNLOAD
 	/* Reference counts */
-	struct module_ref ref[NR_CPUS];
-
+#ifdef CONFIG_SMP
+	char *refptr;
+#else
+	local_t ref;
+#endif
 	/* What modules depend on me? */
 	struct list_head modules_which_use_me;
 
@@ -378,13 +376,22 @@ void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
 
+static inline local_t *__module_ref_addr(struct module *mod, int cpu)
+{
+#ifdef CONFIG_SMP
+	return (local_t *) (mod->refptr + per_cpu_offset(cpu));
+#else
+	return &mod->ref;
+#endif
+}
+
 /* Sometimes we know we already have a refcount, and it's easier not
    to handle the error case (which only happens with rmmod --wait). */
 static inline void __module_get(struct module *module)
 {
 	if (module) {
 		BUG_ON(module_refcount(module) == 0);
-		local_inc(&module->ref[get_cpu()].count);
+		local_inc(__module_ref_addr(module, get_cpu()));
 		put_cpu();
 	}
 }
@@ -396,7 +403,7 @@ static inline int try_module_get(struct module *module)
 	if (module) {
 		unsigned int cpu = get_cpu();
 		if (likely(module_is_live(module)))
-			local_inc(&module->ref[cpu].count);
+			local_inc(__module_ref_addr(module, cpu));
 		else
 			ret = 0;
 		put_cpu();
diff --git a/kernel/module.c b/kernel/module.c
index f5e9491..e0e8712 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -522,13 +522,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
 /* Init the unload section of the module. */
 static void module_unload_init(struct module *mod)
 {
-	unsigned int i;
+	int cpu;
 
 	INIT_LIST_HEAD(&mod->modules_which_use_me);
-	for (i = 0; i < NR_CPUS; i++)
-		local_set(&mod->ref[i].count, 0);
+	for_each_possible_cpu(cpu)
+		local_set(__module_ref_addr(mod, cpu), 0);
 	/* Hold reference count during initialization. */
-	local_set(&mod->ref[raw_smp_processor_id()].count, 1);
+	local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
@@ -659,10 +659,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
 
 unsigned int module_refcount(struct module *mod)
 {
-	unsigned int i, total = 0;
+	unsigned int total = 0;
+	int cpu;
 
-	for (i = 0; i < NR_CPUS; i++)
-		total += local_read(&mod->ref[i].count);
+	for_each_possible_cpu(cpu)
+		total += local_read(__module_ref_addr(mod, cpu));
 	return total;
 }
 EXPORT_SYMBOL(module_refcount);
@@ -824,7 +825,7 @@ void module_put(struct module *module)
 {
 	if (module) {
 		unsigned int cpu = get_cpu();
-		local_dec(&module->ref[cpu].count);
+		local_dec(__module_ref_addr(module, cpu));
 		/* Maybe they're waiting for us to drop reference? */
 		if (unlikely(!module_is_live(module)))
 			wake_up_process(module->waiter);
@@ -1392,7 +1393,10 @@ static void free_module(struct module *mod)
 	kfree(mod->args);
 	if (mod->percpu)
 		percpu_modfree(mod->percpu);
-
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+	if (mod->refptr)
+		percpu_modfree(mod->refptr);
+#endif
 	/* Free lock-classes: */
 	lockdep_free_key_range(mod->module_core, mod->core_size);
 
@@ -1760,6 +1764,9 @@ static struct module *load_module(void __user *umod,
 	unsigned int markersstringsindex;
 	struct module *mod;
 	long err = 0;
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+	void *refptr = NULL;
+#endif
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
 	struct exception_table_entry *extable;
 	mm_segment_t old_fs;
@@ -1904,6 +1911,12 @@ static struct module *load_module(void __user *umod,
 	if (err < 0)
 		goto free_mod;
 
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+	refptr = percpu_modalloc(sizeof(local_t), sizeof(local_t), mod->name);
+	if (!refptr)
+		goto free_mod;
+	mod->refptr = refptr;
+#endif
 	if (pcpuindex) {
 		/* We have a special allocation for this section. */
 		percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
@@ -1911,7 +1924,7 @@ static struct module *load_module(void __user *umod,
 					 mod->name);
 		if (!percpu) {
 			err = -ENOMEM;
-			goto free_mod;
+			goto free_percpu;
 		}
 		sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 		mod->percpu = percpu;
@@ -2164,6 +2177,10 @@ static struct module *load_module(void __user *umod,
  free_percpu:
 	if (percpu)
 		percpu_modfree(percpu);
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+	if (refptr)
+		percpu_modfree(refptr);
+#endif
  free_mod:
 	kfree(args);
  free_hdr:

             reply	other threads:[~2008-05-15 20:40 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-15 20:40 Eric Dumazet [this message]
2008-05-15 21:57 ` [PATCH] modules: Use a better scheme for refcounting Andi Kleen
2008-05-16  4:44   ` Eric Dumazet
2008-05-16  0:09 ` Rusty Russell
2008-05-16  5:29   ` Eric Dumazet
2008-05-16 13:41     ` Mike Travis
2008-05-17  5:33       ` Rusty Russell
2008-05-17  7:36         ` Eric Dumazet
2008-05-18 14:31           ` Rusty Russell
2008-05-19 16:42             ` Christoph Lameter
2008-05-19 16:41           ` Christoph Lameter
2008-05-19 18:04             ` Mike Travis
2008-05-19 16:39         ` Christoph Lameter
  -- strict thread matches above, loose matches on Subject: below --
2009-02-03  3:01 Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=482C9FC5.2070508@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox