* [PATCH] module: kzalloc mod->ref
@ 2009-01-14 3:35 Kyle McMartin
2009-01-14 5:54 ` [PATCHv2] " Kyle McMartin
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: Kyle McMartin @ 2009-01-14 3:35 UTC (permalink / raw)
To: rusty; +Cc: linux-kernel
From: Kyle McMartin <kyle@redhat.com>
Dynamically allocate mod->ref instead of fixing it in the struct module.
Reduces on disk space wasted in the module .ko, and kills a loop
initializing the local_t it contains since we can just kzalloc it.
This matters when we're talking about large NR_CPUS.
Signed-off-by: Kyle McMartin <kyle@redhat.com>
---
The patch removing cacheline_aligned from struct module_ref should be
applied as well to cut down on the amount of memory we allocate. This
patch makes a nice stopgap until we have per_cpu module references.
cheers, Kyle
diff --git a/include/linux/module.h b/include/linux/module.h
index 4f7ea12..5863998 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -345,7 +345,7 @@ struct module
void (*exit)(void);
/* Reference counts */
- struct module_ref ref[NR_CPUS];
+ struct module_ref *ref;
#endif
};
#ifndef MODULE_ARCH_INIT
diff --git a/kernel/module.c b/kernel/module.c
index c9332c9..bc0a9b0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -571,17 +571,19 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
#ifdef CONFIG_MODULE_UNLOAD
/* Init the unload section of the module. */
-static void module_unload_init(struct module *mod)
+static int module_unload_init(struct module *mod)
{
- unsigned int i;
+ mod->ref = kzalloc(NR_CPUS * sizeof(*mod->ref), GFP_KERNEL);
+ if (!mod->ref)
+ return -ENOMEM;
INIT_LIST_HEAD(&mod->modules_which_use_me);
- for (i = 0; i < NR_CPUS; i++)
- local_set(&mod->ref[i].count, 0);
/* Hold reference count during initialization. */
local_set(&mod->ref[raw_smp_processor_id()].count, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
+
+ return 0;
}
/* modules using other modules */
@@ -919,8 +921,9 @@ static inline int use_module(struct module *a, struct module *b)
return strong_try_module_get(b) == 0;
}
-static inline void module_unload_init(struct module *mod)
+static inline int module_unload_init(struct module *mod)
{
+ return 0;
}
#endif /* CONFIG_MODULE_UNLOAD */
@@ -2071,7 +2074,9 @@ static noinline struct module *load_module(void __user *umod,
mod = (void *)sechdrs[modindex].sh_addr;
/* Now we've moved module, initialize linked lists, etc. */
- module_unload_init(mod);
+ err = module_unload_init(mod);
+ if (err < 0)
+ goto free_unload;
/* add kobject, so we can reference it. */
err = mod_sysfs_init(mod);
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCHv2] module: kzalloc mod->ref 2009-01-14 3:35 [PATCH] module: kzalloc mod->ref Kyle McMartin @ 2009-01-14 5:54 ` Kyle McMartin 2009-01-14 9:09 ` [PATCH] " Takashi Iwai 2009-01-14 10:15 ` richard kennedy 2 siblings, 0 replies; 17+ messages in thread From: Kyle McMartin @ 2009-01-14 5:54 UTC (permalink / raw) To: Kyle McMartin; +Cc: rusty, linux-kernel From: Kyle McMartin <kyle@redhat.com> Dynamically allocate mod->ref instead of fixing it in the struct module. Reduces on disk space wasted in the module .ko, and kills a loop initializing the local_t it contains since we can just kzalloc it. This matters when we're talking about large NR_CPUS. Signed-off-by: Kyle McMartin <kyle@redhat.com> --- The patch removing cacheline_aligned from struct module_ref should be applied as well to cut down on the amount of memory we allocate. This patch makes a nice stopgap until we have per_cpu module references. I'm an idiot and forgot to kfree mod->ref last go 'round. refcount doesn't look to be touched after module_unload_free, and since the module unload code stop_machine_run with refcount at 0, it looks to be as safe a place as any to nuke the array. cheers, Kyle diff --git a/include/linux/module.h b/include/linux/module.h index 4f7ea12..5863998 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -345,7 +345,7 @@ struct module void (*exit)(void); /* Reference counts */ - struct module_ref ref[NR_CPUS]; + struct module_ref *ref; #endif }; #ifndef MODULE_ARCH_INIT diff --git a/kernel/module.c b/kernel/module.c index c9332c9..8c8fe13 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -571,17 +571,19 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD /* Init the unload section of the module. */ -static void module_unload_init(struct module *mod) +static int module_unload_init(struct module *mod) { - unsigned int i; + mod->ref = kzalloc(NR_CPUS * sizeof(*mod->ref), GFP_KERNEL); + if (!mod->ref) + return -ENOMEM; INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); /* Hold reference count during initialization. */ local_set(&mod->ref[raw_smp_processor_id()].count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; + + return 0; } /* modules using other modules */ @@ -661,6 +663,8 @@ static void module_unload_free(struct module *mod) } } } + + kfree(mod->ref); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -919,8 +923,9 @@ static inline int use_module(struct module *a, struct module *b) return strong_try_module_get(b) == 0; } -static inline void module_unload_init(struct module *mod) +static inline int module_unload_init(struct module *mod) { + return 0; } #endif /* CONFIG_MODULE_UNLOAD */ @@ -2071,7 +2076,9 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; /* Now we've moved module, initialize linked lists, etc. */ - module_unload_init(mod); + err = module_unload_init(mod); + if (err < 0) + goto free_unload; /* add kobject, so we can reference it. */ err = mod_sysfs_init(mod); ^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 3:35 [PATCH] module: kzalloc mod->ref Kyle McMartin 2009-01-14 5:54 ` [PATCHv2] " Kyle McMartin @ 2009-01-14 9:09 ` Takashi Iwai 2009-01-14 15:58 ` Kyle McMartin [not found] ` <200901151521.47326.rusty@rustcorp.com.au> 2009-01-14 10:15 ` richard kennedy 2 siblings, 2 replies; 17+ messages in thread From: Takashi Iwai @ 2009-01-14 9:09 UTC (permalink / raw) To: Kyle McMartin; +Cc: rusty, linux-kernel At Tue, 13 Jan 2009 22:35:33 -0500, Kyle McMartin wrote: > > From: Kyle McMartin <kyle@redhat.com> > > Dynamically allocate mod->ref instead of fixing it in the struct module. > Reduces on disk space wasted in the module .ko, and kills a loop > initializing the local_t it contains since we can just kzalloc it. > > This matters when we're talking about large NR_CPUS. > > Signed-off-by: Kyle McMartin <kyle@redhat.com> > --- > The patch removing cacheline_aligned from struct module_ref should be > applied as well to cut down on the amount of memory we allocate. This > patch makes a nice stopgap until we have per_cpu module references. > > cheers, Kyle Similar patches (including mine) have been already posted, but no proceed until now... http://lkml.org/lkml/2008/11/11/315 thanks, Takashi ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 9:09 ` [PATCH] " Takashi Iwai @ 2009-01-14 15:58 ` Kyle McMartin 2009-01-14 16:25 ` Takashi Iwai 2009-01-14 17:51 ` richard kennedy [not found] ` <200901151521.47326.rusty@rustcorp.com.au> 1 sibling, 2 replies; 17+ messages in thread From: Kyle McMartin @ 2009-01-14 15:58 UTC (permalink / raw) To: Takashi Iwai; +Cc: Kyle McMartin, rusty, linux-kernel On Wed, Jan 14, 2009 at 10:09:59AM +0100, Takashi Iwai wrote: > At Tue, 13 Jan 2009 22:35:33 -0500, > Kyle McMartin wrote: > > > > From: Kyle McMartin <kyle@redhat.com> > > > > Dynamically allocate mod->ref instead of fixing it in the struct module. > > Reduces on disk space wasted in the module .ko, and kills a loop > > initializing the local_t it contains since we can just kzalloc it. > > > > This matters when we're talking about large NR_CPUS. > > > > Signed-off-by: Kyle McMartin <kyle@redhat.com> > > --- > > The patch removing cacheline_aligned from struct module_ref should be > > applied as well to cut down on the amount of memory we allocate. This > > patch makes a nice stopgap until we have per_cpu module references. > > > > cheers, Kyle > > Similar patches (including mine) have been already posted, but no > proceed until now... > http://lkml.org/lkml/2008/11/11/315 Ah, sigh. It would be nice to get this sorted out, since we're serious wasting disk space for no good reason... Although as Richard points out, dropping the cacheline_aligned might drop networking performance (which, sigh, is also stupid) but allocating 128b * NR_CPUS is just a ridiculous amount of memory to waste for a reference count... ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 15:58 ` Kyle McMartin @ 2009-01-14 16:25 ` Takashi Iwai 2009-01-14 19:29 ` Valdis.Kletnieks 2009-01-14 17:51 ` richard kennedy 1 sibling, 1 reply; 17+ messages in thread From: Takashi Iwai @ 2009-01-14 16:25 UTC (permalink / raw) To: Kyle McMartin; +Cc: rusty, linux-kernel At Wed, 14 Jan 2009 10:58:59 -0500, Kyle McMartin wrote: > > On Wed, Jan 14, 2009 at 10:09:59AM +0100, Takashi Iwai wrote: > > At Tue, 13 Jan 2009 22:35:33 -0500, > > Kyle McMartin wrote: > > > > > > From: Kyle McMartin <kyle@redhat.com> > > > > > > Dynamically allocate mod->ref instead of fixing it in the struct module. > > > Reduces on disk space wasted in the module .ko, and kills a loop > > > initializing the local_t it contains since we can just kzalloc it. > > > > > > This matters when we're talking about large NR_CPUS. > > > > > > Signed-off-by: Kyle McMartin <kyle@redhat.com> > > > --- > > > The patch removing cacheline_aligned from struct module_ref should be > > > applied as well to cut down on the amount of memory we allocate. This > > > patch makes a nice stopgap until we have per_cpu module references. > > > > > > cheers, Kyle > > > > Similar patches (including mine) have been already posted, but no > > proceed until now... > > http://lkml.org/lkml/2008/11/11/315 > > Ah, sigh. > > It would be nice to get this sorted out, since we're serious wasting > disk space for no good reason... > > Although as Richard points out, dropping the cacheline_aligned might > drop networking performance (which, sigh, is also stupid) but allocating > 128b * NR_CPUS is just a ridiculous amount of memory to waste for a > reference count... We can use nr_cpu_ids instead of NR_CPUS. For a machine like 4096 CPUs, such an amount of memory is like a peanut :) Takashi ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 16:25 ` Takashi Iwai @ 2009-01-14 19:29 ` Valdis.Kletnieks 2009-01-14 20:37 ` Takashi Iwai 0 siblings, 1 reply; 17+ messages in thread From: Valdis.Kletnieks @ 2009-01-14 19:29 UTC (permalink / raw) To: Takashi Iwai; +Cc: Kyle McMartin, rusty, linux-kernel [-- Attachment #1: Type: text/plain, Size: 389 bytes --] On Wed, 14 Jan 2009 17:25:31 +0100, Takashi Iwai said: > We can use nr_cpu_ids instead of NR_CPUS. > For a machine like 4096 CPUs, such an amount of memory is like a > peanut :) It's not peanuts if you're dealing with a distro kernel that's essentially forced to compile with NR_CPUS=<some moby integer>, but booting on a single or dual core system with possibly not even 1G of memory. [-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --] ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 19:29 ` Valdis.Kletnieks @ 2009-01-14 20:37 ` Takashi Iwai 2009-01-14 20:53 ` Kyle McMartin 0 siblings, 1 reply; 17+ messages in thread From: Takashi Iwai @ 2009-01-14 20:37 UTC (permalink / raw) To: Valdis.Kletnieks; +Cc: Kyle McMartin, rusty, linux-kernel At Wed, 14 Jan 2009 14:29:08 -0500, Valdis.Kletnieks@vt.edu wrote: > > On Wed, 14 Jan 2009 17:25:31 +0100, Takashi Iwai said: > > > We can use nr_cpu_ids instead of NR_CPUS. > > For a machine like 4096 CPUs, such an amount of memory is like a > > peanut :) > > It's not peanuts if you're dealing with a distro kernel that's essentially > forced to compile with NR_CPUS=<some moby integer>, but booting on a single or > dual core system with possibly not even 1G of memory. My suggestion above is to use nr_cpu_ids, which is the actual number of CPUs, not a constant like NR_CPUS. Takashi ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 20:37 ` Takashi Iwai @ 2009-01-14 20:53 ` Kyle McMartin 0 siblings, 0 replies; 17+ messages in thread From: Kyle McMartin @ 2009-01-14 20:53 UTC (permalink / raw) To: Takashi Iwai; +Cc: Valdis.Kletnieks, Kyle McMartin, rusty, linux-kernel On Wed, Jan 14, 2009 at 09:37:18PM +0100, Takashi Iwai wrote: > At Wed, 14 Jan 2009 14:29:08 -0500, > Valdis.Kletnieks@vt.edu wrote: > > > > On Wed, 14 Jan 2009 17:25:31 +0100, Takashi Iwai said: > > > > > We can use nr_cpu_ids instead of NR_CPUS. > > > For a machine like 4096 CPUs, such an amount of memory is like a > > > peanut :) > > > > It's not peanuts if you're dealing with a distro kernel that's essentially > > forced to compile with NR_CPUS=<some moby integer>, but booting on a single or > > dual core system with possibly not even 1G of memory. > > My suggestion above is to use nr_cpu_ids, which is the actual number > of CPUs, not a constant like NR_CPUS. > Yes, I agree that it's a better solution, since I suppose it's based on cpu_possible_mask as opposed to, well, the maximum possible. I'll fire off an updated patch. Let's see what rusty thinks. cheers, Kyle ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 15:58 ` Kyle McMartin 2009-01-14 16:25 ` Takashi Iwai @ 2009-01-14 17:51 ` richard kennedy 2009-01-14 18:17 ` Kyle McMartin 1 sibling, 1 reply; 17+ messages in thread From: richard kennedy @ 2009-01-14 17:51 UTC (permalink / raw) To: Kyle McMartin; +Cc: Takashi Iwai, rusty, linux-kernel Kyle McMartin wrote: > On Wed, Jan 14, 2009 at 10:09:59AM +0100, Takashi Iwai wrote: >> At Tue, 13 Jan 2009 22:35:33 -0500, >> Kyle McMartin wrote: >>> From: Kyle McMartin <kyle@redhat.com> >>> >>> Dynamically allocate mod->ref instead of fixing it in the struct module. >>> Reduces on disk space wasted in the module .ko, and kills a loop >>> initializing the local_t it contains since we can just kzalloc it. >>> >>> This matters when we're talking about large NR_CPUS. >>> >>> Signed-off-by: Kyle McMartin <kyle@redhat.com> >>> --- >>> The patch removing cacheline_aligned from struct module_ref should be >>> applied as well to cut down on the amount of memory we allocate. This >>> patch makes a nice stopgap until we have per_cpu module references. >>> >>> cheers, Kyle >> Similar patches (including mine) have been already posted, but no >> proceed until now... >> http://lkml.org/lkml/2008/11/11/315 > > Ah, sigh. > > It would be nice to get this sorted out, since we're serious wasting > disk space for no good reason... > > Although as Richard points out, dropping the cacheline_aligned might > drop networking performance (which, sigh, is also stupid) but allocating > 128b * NR_CPUS is just a ridiculous amount of memory to waste for a > reference count... > Aside from the code in socket does this reference count really get used that often? Atomic_t gets used for ref counts is lots of other places in the kernel, so why not turn module_ref into an atomic counter & drop the array entirely saving all of the memory & disk space? I do wonder if socket could manage its module lifetimes in some other way, then we really could just use an atomic module ref without too much impact. I didn't get very far in trying to work out what exactly was going on in socket.c but maybe it's worth another look. I don't have any network test harness so it's difficult to tell what impact any code change is going to have. Do you have any suggestions for a good test of this? regards Richard ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 17:51 ` richard kennedy @ 2009-01-14 18:17 ` Kyle McMartin [not found] ` <200901151303.20006.rusty@rustcorp.com.au> 0 siblings, 1 reply; 17+ messages in thread From: Kyle McMartin @ 2009-01-14 18:17 UTC (permalink / raw) To: richard kennedy; +Cc: Kyle McMartin, Takashi Iwai, rusty, linux-kernel On Wed, Jan 14, 2009 at 05:51:27PM +0000, richard kennedy wrote: > Aside from the code in socket does this reference count really get used > that often? Atomic_t gets used for ref counts is lots of other places in > the kernel, so why not turn module_ref into an atomic counter & drop the > array entirely saving all of the memory & disk space? > > I do wonder if socket could manage its module lifetimes in some other > way, then we really could just use an atomic module ref without too much > impact. I didn't get very far in trying to work out what exactly was > going on in socket.c but maybe it's worth another look. > I suppose it depends on how much contention there will end up being on the atomic_t, given that right now, the module_ref structure favours writers (since readers need to loop.) I'd like to hear what rusty has to say, but I'm working on a patch to do module_ref with RCU (and to allocate it in modpost with per_cpu variables, but that needs more testing.) I'll check what kind of difference those end up making. > I don't have any network test harness so it's difficult to tell what > impact any code change is going to have. Do you have any suggestions for > a good test of this? > No, but I imagine Rick Jones' netperf would show the effect, if any. cheers, Kyle ^ permalink raw reply [flat|nested] 17+ messages in thread
[parent not found: <200901151303.20006.rusty@rustcorp.com.au>]
* Re: [PATCH] module: kzalloc mod->ref [not found] ` <200901151303.20006.rusty@rustcorp.com.au> @ 2009-01-25 15:52 ` Richard Kennedy 0 siblings, 0 replies; 17+ messages in thread From: Richard Kennedy @ 2009-01-25 15:52 UTC (permalink / raw) To: Rusty Russell; +Cc: Kyle McMartin, Takashi Iwai, lkml On Thu, 2009-01-15 at 13:03 +1030, Rusty Russell wrote: > > > On Thursday 15 January 2009 04:47:48 Kyle McMartin wrote: > > > On Wed, Jan 14, 2009 at 05:51:27PM +0000, richard kennedy wrote: > > > > Aside from the code in socket does this reference count really get > used > > > > that often? Atomic_t gets used for ref counts is lots of other > places in > > > > the kernel, so why not turn module_ref into an atomic counter & > drop the > > > > array entirely saving all of the memory & disk space? > > The code was originally written with the intent that networking would > inc and dec a module count on every *packet*. It doesn't, however > (though it would be interesting to instrument who does manip it, and > how often). > > I suspect that we could get away with an atomic_t and noone would > notice, but I'd like to see evidence one way or the other. > > Cheers, > > Rusty. Hi Rusty, I've been running module reference counts as atomic_t on my desktop (x86_64 AMD X2) all week and haven't been able to measure any significant performance differences. I guess more cpus & server workloads could show up something, but I don't have access to that sort of hardware. The one issue I am seeing is with my simple test harness, which is just a very basic socket client/server. With the existing ref counting code, when running several copies of the client I sometimes get EADDRNOTAVAIL (-99) errors on socket connect(). But I don't see any errors with the atomic_t ref counting. I'm still trying to debug this, but I haven't yet found where in the network stack these errors are coming from. regards Richard ^ permalink raw reply [flat|nested] 17+ messages in thread
[parent not found: <200901151521.47326.rusty@rustcorp.com.au>]
* Re: [PATCH] module: kzalloc mod->ref [not found] ` <200901151521.47326.rusty@rustcorp.com.au> @ 2009-01-15 6:03 ` Kyle McMartin [not found] ` <200901261751.21817.rusty@rustcorp.com.au> 1 sibling, 0 replies; 17+ messages in thread From: Kyle McMartin @ 2009-01-15 6:03 UTC (permalink / raw) To: Rusty Russell Cc: Takashi Iwai, Kyle McMartin, linux-kernel, Eric Dumazet, Linus Torvalds On Thu, Jan 15, 2009 at 03:21:46PM +1030, Rusty Russell wrote: > In fact, I've decided to go back and add Eric's patch. Since alloc_percpu > isn't going to be fixed this merge window, we'll do this now, then simply > use alloc_percpu once that is merged. Er, what do you mean by "fixed"? ;-) Perhaps I'm just being thick, but this seems to be working for me. At least, the refcnts seem to be the same as the version without and unloading is fine... Possibly I'm just being dense though. :) regards, Kyle diff --git a/include/linux/module.h b/include/linux/module.h index 4f7ea12..61f97a2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -219,11 +219,6 @@ void *__symbol_get_gpl(const char *symbol); #endif -struct module_ref -{ - local_t count; -} ____cacheline_aligned; - enum module_state { MODULE_STATE_LIVE, @@ -345,7 +340,7 @@ struct module void (*exit)(void); /* Reference counts */ - struct module_ref ref[NR_CPUS]; + local_t *ref; /* percpu */ #endif }; #ifndef MODULE_ARCH_INIT @@ -400,8 +395,9 @@ void symbol_put_addr(void *addr); static inline void __module_get(struct module *module) { if (module) { + int cpu = get_cpu(); BUG_ON(module_refcount(module) == 0); - local_inc(&module->ref[get_cpu()].count); + local_inc(per_cpu_ptr(module->ref, cpu)); put_cpu(); } } @@ -412,9 +408,9 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) - local_inc(&module->ref[cpu].count); - else + if (likely(module_is_live(module))) { + local_inc(per_cpu_ptr(module->ref, cpu)); + } else ret = 0; put_cpu(); } diff --git a/kernel/module.c b/kernel/module.c index c9332c9..e504f49 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -571,17 +571,19 @@ static char last_unloaded_module[MODULE_NAME_LEN+1]; #ifdef CONFIG_MODULE_UNLOAD /* Init the unload section of the module. */ -static void module_unload_init(struct module *mod) +static int module_unload_init(struct module *mod) { - unsigned int i; + mod->ref = percpu_alloc(sizeof(*mod->ref), GFP_KERNEL); + if (!mod->ref) + return -ENOMEM; INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); /* Hold reference count during initialization. */ - local_set(&mod->ref[raw_smp_processor_id()].count, 1); + local_set(per_cpu_ptr(mod->ref, raw_smp_processor_id()), 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; + + return 0; } /* modules using other modules */ @@ -661,6 +663,8 @@ static void module_unload_free(struct module *mod) } } } + + percpu_free(mod->ref); } #ifdef CONFIG_MODULE_FORCE_UNLOAD @@ -717,10 +721,12 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int i, total = 0; + unsigned int cpu, total = 0; + + for_each_possible_cpu(cpu) { + total += local_read(per_cpu_ptr(mod->ref, cpu)); + } - for (i = 0; i < NR_CPUS; i++) - total += local_read(&mod->ref[i].count); return total; } EXPORT_SYMBOL(module_refcount); @@ -894,7 +900,8 @@ void module_put(struct module *module) { if (module) { unsigned int cpu = get_cpu(); - local_dec(&module->ref[cpu].count); + + local_dec(per_cpu_ptr(module->ref, cpu)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -919,8 +926,9 @@ static inline int use_module(struct module *a, struct module *b) return strong_try_module_get(b) == 0; } -static inline void module_unload_init(struct module *mod) +static inline int module_unload_init(struct module *mod) { + return 0; } #endif /* CONFIG_MODULE_UNLOAD */ @@ -2071,7 +2079,9 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; /* Now we've moved module, initialize linked lists, etc. */ - module_unload_init(mod); + err = module_unload_init(mod); + if (err < 0) + goto free_unload; /* add kobject, so we can reference it. */ err = mod_sysfs_init(mod); ^ permalink raw reply related [flat|nested] 17+ messages in thread
[parent not found: <200901261751.21817.rusty@rustcorp.com.au>]
* Re: [PATCH] module: kzalloc mod->ref [not found] ` <200901261751.21817.rusty@rustcorp.com.au> @ 2009-01-27 1:36 ` Rusty Russell 2009-01-27 1:58 ` Tejun Heo 2009-01-27 13:15 ` Ingo Molnar 0 siblings, 2 replies; 17+ messages in thread From: Rusty Russell @ 2009-01-27 1:36 UTC (permalink / raw) To: Takashi Iwai, Ingo Molnar, Tejun Heo Cc: Kyle McMartin, linux-kernel, Eric Dumazet On Monday 26 January 2009 17:51:21 Rusty Russell wrote: > On Thursday 15 January 2009 15:21:46 Rusty Russell wrote: > > In fact, I've decided to go back and add Eric's patch. Since > > alloc_percpu isn't going to be fixed this merge window, we'll do this > > now, then simply use alloc_percpu once that is merged. > > And with the following fixes, it's in linux-next for tomorrow. Ingo, can I push this via your tree? I don't expect any problems, but I hate breaking modules and it'd be nice to have indep testing and get this in soon; linux-next is off this week. (The percpu alloc patchset I promised Tejun also depends trivially on this so getting it upstream will forward that handoff). Thanks, Rusty. Subject: modules: Use a better scheme for refcounting Date: Thu, 15 May 2008 22:40:37 +0200 From: Eric Dumazet <dada1@cosmosbay.com> (This has become critical with the mainline inclusion of NR_CPUS 4096) Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y) is using a lot of memory. Each 'struct module' contains an [NR_CPUS] array of full cache lines. This patch uses existing infrastructure (percpu_modalloc() & percpu_modfree()) to allocate percpu space for the refcount storage. Instead of wasting NR_CPUS*128 bytes (on i386), we now use nr_cpu_ids*sizeof(local_t) bytes. On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce size of module files by about 2 Mbytes. (1Kb per module) Instead of having all refcounters in the same memory node - with TLB misses because of vmalloc() - this new implementation permits to have better NUMA properties, since each CPU will use storage on its preferred node, thanks to percpu storage. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- include/linux/module.h | 25 ++++++++++++++++--------- kernel/module.c | 35 +++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h --- a/include/linux/module.h +++ b/include/linux/module.h @@ -219,11 +219,6 @@ void *__symbol_get_gpl(const char *symbo #endif -struct module_ref -{ - local_t count; -} ____cacheline_aligned; - enum module_state { MODULE_STATE_LIVE, @@ -344,8 +339,11 @@ struct module /* Destruction function. */ void (*exit)(void); - /* Reference counts */ - struct module_ref ref[NR_CPUS]; +#ifdef CONFIG_SMP + char *refptr; +#else + local_t ref; +#endif #endif }; #ifndef MODULE_ARCH_INIT @@ -395,13 +393,22 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); +static inline local_t *__module_ref_addr(struct module *mod, int cpu) +{ +#ifdef CONFIG_SMP + return (local_t *) (mod->refptr + per_cpu_offset(cpu)); +#else + return &mod->ref; +#endif +} + /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { BUG_ON(module_refcount(module) == 0); - local_inc(&module->ref[get_cpu()].count); + local_inc(__module_ref_addr(module, get_cpu())); put_cpu(); } } @@ -413,7 +420,7 @@ static inline int try_module_get(struct if (module) { unsigned int cpu = get_cpu(); if (likely(module_is_live(module))) - local_inc(&module->ref[cpu].count); + local_inc(__module_ref_addr(module, cpu)); else ret = 0; put_cpu(); diff --git a/kernel/module.c b/kernel/module.c --- a/kernel/module.c +++ b/kernel/module.c @@ -573,13 +573,13 @@ static char last_unloaded_module[MODULE_ /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { - unsigned int i; + int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for (i = 0; i < NR_CPUS; i++) - local_set(&mod->ref[i].count, 0); + for_each_possible_cpu(cpu) + local_set(__module_ref_addr(mod, cpu), 0); /* Hold reference count during initialization. */ - local_set(&mod->ref[raw_smp_processor_id()].count, 1); + local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -717,10 +717,11 @@ static int try_stop_module(struct module unsigned int module_refcount(struct module *mod) { - unsigned int i, total = 0; + unsigned int total = 0; + int cpu; - for (i = 0; i < NR_CPUS; i++) - total += local_read(&mod->ref[i].count); + for_each_possible_cpu(cpu) + total += local_read(__module_ref_addr(mod, cpu)); return total; } EXPORT_SYMBOL(module_refcount); @@ -894,7 +895,7 @@ void module_put(struct module *module) { if (module) { unsigned int cpu = get_cpu(); - local_dec(&module->ref[cpu].count); + local_dec(__module_ref_addr(module, cpu)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1464,7 +1465,10 @@ static void free_module(struct module *m kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); - +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + if (mod->refptr) + percpu_modfree(mod->refptr); +#endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2011,6 +2015,14 @@ static noinline struct module *load_modu if (err < 0) goto free_mod; +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), + mod->name); + if (!mod->refptr) { + err = -ENOMEM; + goto free_mod; + } +#endif if (pcpuindex) { /* We have a special allocation for this section. */ percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, @@ -2018,7 +2030,7 @@ static noinline struct module *load_modu mod->name); if (!percpu) { err = -ENOMEM; - goto free_mod; + goto free_percpu; } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; mod->percpu = percpu; @@ -2282,6 +2294,9 @@ static noinline struct module *load_modu free_percpu: if (percpu) percpu_modfree(percpu); +#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) + percpu_modfree(mod->refptr); +#endif free_mod: kfree(args); free_hdr: ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-27 1:36 ` Rusty Russell @ 2009-01-27 1:58 ` Tejun Heo 2009-01-27 13:15 ` Ingo Molnar 1 sibling, 0 replies; 17+ messages in thread From: Tejun Heo @ 2009-01-27 1:58 UTC (permalink / raw) To: Rusty Russell Cc: Takashi Iwai, Ingo Molnar, Kyle McMartin, linux-kernel, Eric Dumazet Rusty Russell wrote: > On Monday 26 January 2009 17:51:21 Rusty Russell wrote: >> On Thursday 15 January 2009 15:21:46 Rusty Russell wrote: >>> In fact, I've decided to go back and add Eric's patch. Since >>> alloc_percpu isn't going to be fixed this merge window, we'll do this >>> now, then simply use alloc_percpu once that is merged. >> And with the following fixes, it's in linux-next for tomorrow. > > Ingo, can I push this via your tree? I don't expect any problems, but I > hate breaking modules and it'd be nice to have indep testing and get this in > soon; linux-next is off this week. > > (The percpu alloc patchset I promised Tejun also depends trivially on this > so getting it upstream will forward that handoff). FWIW, the patch looks good to me and I'll be happy to forward it through core/percpu. Thanks. -- tejun ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-27 1:36 ` Rusty Russell 2009-01-27 1:58 ` Tejun Heo @ 2009-01-27 13:15 ` Ingo Molnar 2009-01-28 12:24 ` Rusty Russell 1 sibling, 1 reply; 17+ messages in thread From: Ingo Molnar @ 2009-01-27 13:15 UTC (permalink / raw) To: Rusty Russell Cc: Takashi Iwai, Ingo Molnar, Tejun Heo, Kyle McMartin, linux-kernel, Eric Dumazet * Rusty Russell <rusty@rustcorp.com.au> wrote: > @@ -344,8 +339,11 @@ struct module > /* Destruction function. */ > void (*exit)(void); > > - /* Reference counts */ > - struct module_ref ref[NR_CPUS]; > +#ifdef CONFIG_SMP > + char *refptr; > +#else > + local_t ref; > +#endif hm, that construct looks rather ugly. Is there no way to provide a clean data type and APIs for this that just work symmetrically on both SMP and UP? Ingo ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-27 13:15 ` Ingo Molnar @ 2009-01-28 12:24 ` Rusty Russell 0 siblings, 0 replies; 17+ messages in thread From: Rusty Russell @ 2009-01-28 12:24 UTC (permalink / raw) To: Ingo Molnar Cc: Takashi Iwai, Ingo Molnar, Tejun Heo, Kyle McMartin, linux-kernel, Eric Dumazet On Tuesday 27 January 2009 23:45:34 Ingo Molnar wrote: > > * Rusty Russell <rusty@rustcorp.com.au> wrote: > > > @@ -344,8 +339,11 @@ struct module > > /* Destruction function. */ > > void (*exit)(void); > > > > - /* Reference counts */ > > - struct module_ref ref[NR_CPUS]; > > +#ifdef CONFIG_SMP > > + char *refptr; > > +#else > > + local_t ref; > > +#endif > > hm, that construct looks rather ugly. Is there no way to provide a clean > data type and APIs for this that just work symmetrically on both SMP and > UP? Part of me agreed when I first read it, but unification means UP would do an alloc as well. Neater, but less efficient. But maybe these days UP means embedded, and hence no modules anyway :) Hope that clarifies, Rusty. ^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] module: kzalloc mod->ref 2009-01-14 3:35 [PATCH] module: kzalloc mod->ref Kyle McMartin 2009-01-14 5:54 ` [PATCHv2] " Kyle McMartin 2009-01-14 9:09 ` [PATCH] " Takashi Iwai @ 2009-01-14 10:15 ` richard kennedy 2 siblings, 0 replies; 17+ messages in thread From: richard kennedy @ 2009-01-14 10:15 UTC (permalink / raw) To: Kyle McMartin; +Cc: rusty, linux-kernel Kyle McMartin wrote: > From: Kyle McMartin <kyle@redhat.com> > > Dynamically allocate mod->ref instead of fixing it in the struct module. > Reduces on disk space wasted in the module .ko, and kills a loop > initializing the local_t it contains since we can just kzalloc it. > > This matters when we're talking about large NR_CPUS. > > Signed-off-by: Kyle McMartin <kyle@redhat.com> > --- > The patch removing cacheline_aligned from struct module_ref should be > applied as well to cut down on the amount of memory we allocate. This > patch makes a nice stopgap until we have per_cpu module references. > > cheers, Kyle > Hi Kyle, As module_get/ put use module_ref as a per_cpu variable won't removing the cacheline_aligned be a performance hit due to cacheline sharing on machines with reasonable or small number of cpus? (some of the network code seems to call module_get/put a lot!) regards Richard ^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2009-01-28 12:25 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-14 3:35 [PATCH] module: kzalloc mod->ref Kyle McMartin
2009-01-14 5:54 ` [PATCHv2] " Kyle McMartin
2009-01-14 9:09 ` [PATCH] " Takashi Iwai
2009-01-14 15:58 ` Kyle McMartin
2009-01-14 16:25 ` Takashi Iwai
2009-01-14 19:29 ` Valdis.Kletnieks
2009-01-14 20:37 ` Takashi Iwai
2009-01-14 20:53 ` Kyle McMartin
2009-01-14 17:51 ` richard kennedy
2009-01-14 18:17 ` Kyle McMartin
[not found] ` <200901151303.20006.rusty@rustcorp.com.au>
2009-01-25 15:52 ` Richard Kennedy
[not found] ` <200901151521.47326.rusty@rustcorp.com.au>
2009-01-15 6:03 ` Kyle McMartin
[not found] ` <200901261751.21817.rusty@rustcorp.com.au>
2009-01-27 1:36 ` Rusty Russell
2009-01-27 1:58 ` Tejun Heo
2009-01-27 13:15 ` Ingo Molnar
2009-01-28 12:24 ` Rusty Russell
2009-01-14 10:15 ` richard kennedy
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox