From: Eric Dumazet <dada1@cosmosbay.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>,
linux kernel <linux-kernel@vger.kernel.org>,
Mike Travis <travis@sgi.com>
Subject: [PATCH] modules: Use a better scheme for refcounting
Date: Thu, 15 May 2008 22:40:37 +0200 [thread overview]
Message-ID: <482C9FC5.2070508@cosmosbay.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1003 bytes --]
Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y)
is using a lot of memory.
Each 'struct module' contains an [NR_CPUS] array of full cache lines.
This patch uses existing infrastructure (percpu_modalloc() &
percpu_modfree())
to allocate percpu space for the refcount storage.
Instead of wasting NR_CPUS*128 bytes (on i386), we now use
num_possible_cpus*sizeof(local_t) bytes.
On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce
size of module files by about 2 Mbytes. (1Kb per module)
Instead of having all refcounters in the same memory node - with TLB misses
because of vmalloc() - this new implementation permits to have better
NUMA properties, since each CPU will use storage on its preferred node,
thanks to percpu storage.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
include/linux/module.h | 25 ++++++++++++++++---------
kernel/module.c | 37 +++++++++++++++++++++++++++----------
2 files changed, 43 insertions(+), 19 deletions(-)
[-- Attachment #2: module.patch --]
[-- Type: text/plain, Size: 4856 bytes --]
diff --git a/include/linux/module.h b/include/linux/module.h
index 3e03b1a..72db614 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -217,11 +217,6 @@ void *__symbol_get_gpl(const char *symbol);
#endif
-struct module_ref
-{
- local_t count;
-} ____cacheline_aligned;
-
enum module_state
{
MODULE_STATE_LIVE,
@@ -307,8 +302,11 @@ struct module
#ifdef CONFIG_MODULE_UNLOAD
/* Reference counts */
- struct module_ref ref[NR_CPUS];
-
+#ifdef CONFIG_SMP
+ char *refptr;
+#else
+ local_t ref;
+#endif
/* What modules depend on me? */
struct list_head modules_which_use_me;
@@ -378,13 +376,22 @@ void __symbol_put(const char *symbol);
#define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
void symbol_put_addr(void *addr);
+static inline local_t *__module_ref_addr(struct module *mod, int cpu)
+{
+#ifdef CONFIG_SMP
+ return (local_t *) (mod->refptr + per_cpu_offset(cpu));
+#else
+ return &mod->ref;
+#endif
+}
+
/* Sometimes we know we already have a refcount, and it's easier not
to handle the error case (which only happens with rmmod --wait). */
static inline void __module_get(struct module *module)
{
if (module) {
BUG_ON(module_refcount(module) == 0);
- local_inc(&module->ref[get_cpu()].count);
+ local_inc(__module_ref_addr(module, get_cpu()));
put_cpu();
}
}
@@ -396,7 +403,7 @@ static inline int try_module_get(struct module *module)
if (module) {
unsigned int cpu = get_cpu();
if (likely(module_is_live(module)))
- local_inc(&module->ref[cpu].count);
+ local_inc(__module_ref_addr(module, cpu));
else
ret = 0;
put_cpu();
diff --git a/kernel/module.c b/kernel/module.c
index f5e9491..e0e8712 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -522,13 +522,13 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
- unsigned int i;
+ int cpu;
INIT_LIST_HEAD(&mod->modules_which_use_me);
- for (i = 0; i < NR_CPUS; i++)
- local_set(&mod->ref[i].count, 0);
+ for_each_possible_cpu(cpu)
+ local_set(__module_ref_addr(mod, cpu), 0);
/* Hold reference count during initialization. */
- local_set(&mod->ref[raw_smp_processor_id()].count, 1);
+ local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -659,10 +659,11 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
unsigned int module_refcount(struct module *mod)
{
- unsigned int i, total = 0;
+ unsigned int total = 0;
+ int cpu;
- for (i = 0; i < NR_CPUS; i++)
- total += local_read(&mod->ref[i].count);
+ for_each_possible_cpu(cpu)
+ total += local_read(__module_ref_addr(mod, cpu));
return total;
}
EXPORT_SYMBOL(module_refcount);
@@ -824,7 +825,7 @@ void module_put(struct module *module)
{
if (module) {
unsigned int cpu = get_cpu();
- local_dec(&module->ref[cpu].count);
+ local_dec(__module_ref_addr(module, cpu));
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
@@ -1392,7 +1393,10 @@ static void free_module(struct module *mod)
kfree(mod->args);
if (mod->percpu)
percpu_modfree(mod->percpu);
-
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ if (mod->refptr)
+ percpu_modfree(mod->refptr);
+#endif
/* Free lock-classes: */
lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -1760,6 +1764,9 @@ static struct module *load_module(void __user *umod,
unsigned int markersstringsindex;
struct module *mod;
long err = 0;
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ void *refptr = NULL;
+#endif
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
struct exception_table_entry *extable;
mm_segment_t old_fs;
@@ -1904,6 +1911,12 @@ static struct module *load_module(void __user *umod,
if (err < 0)
goto free_mod;
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ refptr = percpu_modalloc(sizeof(local_t), sizeof(local_t), mod->name);
+ if (!refptr)
+ goto free_mod;
+ mod->refptr = refptr;
+#endif
if (pcpuindex) {
/* We have a special allocation for this section. */
percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
@@ -1911,7 +1924,7 @@ static struct module *load_module(void __user *umod,
mod->name);
if (!percpu) {
err = -ENOMEM;
- goto free_mod;
+ goto free_percpu;
}
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
mod->percpu = percpu;
@@ -2164,6 +2177,10 @@ static struct module *load_module(void __user *umod,
free_percpu:
if (percpu)
percpu_modfree(percpu);
+#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+ if (refptr)
+ percpu_modfree(refptr);
+#endif
free_mod:
kfree(args);
free_hdr:
next reply other threads:[~2008-05-15 20:40 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-05-15 20:40 Eric Dumazet [this message]
2008-05-15 21:57 ` [PATCH] modules: Use a better scheme for refcounting Andi Kleen
2008-05-16 4:44 ` Eric Dumazet
2008-05-16 0:09 ` Rusty Russell
2008-05-16 5:29 ` Eric Dumazet
2008-05-16 13:41 ` Mike Travis
2008-05-17 5:33 ` Rusty Russell
2008-05-17 7:36 ` Eric Dumazet
2008-05-18 14:31 ` Rusty Russell
2008-05-19 16:42 ` Christoph Lameter
2008-05-19 16:41 ` Christoph Lameter
2008-05-19 18:04 ` Mike Travis
2008-05-19 16:39 ` Christoph Lameter
-- strict thread matches above, loose matches on Subject: below --
2009-02-03 3:01 Rusty Russell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=482C9FC5.2070508@cosmosbay.com \
--to=dada1@cosmosbay.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rusty@rustcorp.com.au \
--cc=travis@sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.