From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751134AbWFLLt7 (ORCPT ); Mon, 12 Jun 2006 07:49:59 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751892AbWFLLt7 (ORCPT ); Mon, 12 Jun 2006 07:49:59 -0400 Received: from mx2.mail.elte.hu ([157.181.151.9]:51371 "EHLO mx2.mail.elte.hu") by vger.kernel.org with ESMTP id S1751134AbWFLLt6 (ORCPT ); Mon, 12 Jun 2006 07:49:58 -0400 Date: Mon, 12 Jun 2006 13:48:57 +0200 From: Ingo Molnar To: Christoph Lameter Cc: Michal Piotrowski , Andrew Morton , linux-kernel@vger.kernel.org Subject: Re: 2.6.16-rc6-mm2 Message-ID: <20060612114857.GA14616@elte.hu> References: <20060609214024.2f7dd72c.akpm@osdl.org> <6bffcb0e0606100323p122e9b23g37350fa9692337ae@mail.gmail.com> <20060610092412.66dd109f.akpm@osdl.org> <20060610100318.8900f849.akpm@osdl.org> <6bffcb0e0606101114u37c8b642u5c9cc8dd566cba7c@mail.gmail.com> <20060612110537.GA11358@elte.hu> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20060612110537.GA11358@elte.hu> User-Agent: Mutt/1.4.2.1i X-ELTE-SpamScore: -3.1 X-ELTE-SpamLevel: X-ELTE-SpamCheck: no X-ELTE-SpamVersion: ELTE 2.0 X-ELTE-SpamCheck-Details: score=-3.1 required=5.9 tests=ALL_TRUSTED,AWL,BAYES_50 autolearn=no SpamAssassin version=3.0.3 -3.3 ALL_TRUSTED Did not pass through any untrusted hosts 0.0 BAYES_50 BODY: Bayesian spam probability is 40 to 60% [score: 0.5000] 0.2 AWL AWL: From: address is in the auto white-list X-ELTE-VirusStatus: clean Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org * Ingo Molnar wrote: > > * Christoph Lameter wrote: > > > Sorry that patch was still against mm1. Here is a fixed up version > > that applies cleanly against mm2: > > i have applied both patches you sent in this thread but it still > triggers tons of messages: > trying to fix it i realized that i'd have to touch tons of > architectures, which all duplicate this piece of code: below is an updated patch that includes fixups for i386 - but the real fix should be to properly reduce the per-arch local.h footprint to the bare minimum possible, and to do this fix on the asm-generic headers. Ingo --- include/asm-generic/local.h | 20 ++++++++++---------- include/asm-i386/local.h | 25 ++++++++++++++----------- include/linux/page-flags.h | 14 +++++--------- mm/page_alloc.c | 8 ++++---- 4 files changed, 33 insertions(+), 34 deletions(-) Index: linux/include/asm-generic/local.h =================================================================== --- linux.orig/include/asm-generic/local.h +++ linux/include/asm-generic/local.h @@ -44,19 +44,19 @@ typedef struct * much more efficient than these naive implementations. Note they take * a variable (eg. mystruct.foo), not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(v) local_read(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_set(v, i) local_set(&per_cpu(v, raw_smp_processor_id()), (i)) +#define cpu_local_inc(v) local_inc(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_dec(v) local_dec(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_add(i, v) local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define cpu_local_sub(i, v) local_sub((i), &per_cpu(v, raw_smp_processor_id())) /* Non-atomic increments, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ -#define __cpu_local_inc(v) __local_inc(&__get_cpu_var(v)) -#define __cpu_local_dec(v) __local_dec(&__get_cpu_var(v)) -#define __cpu_local_add(i, v) __local_add((i), &__get_cpu_var(v)) -#define __cpu_local_sub(i, v) __local_sub((i), &__get_cpu_var(v)) +#define __cpu_local_inc(v) __local_inc(&per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_dec(v) __local_dec(&per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_add(i, v) __local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_sub(i, v) __local_sub((i), &per_cpu(v, raw_smp_processor_id())) #endif /* _ASM_GENERIC_LOCAL_H */ Index: linux/include/asm-i386/local.h =================================================================== --- linux.orig/include/asm-i386/local.h +++ linux/include/asm-i386/local.h @@ -53,18 +53,21 @@ static __inline__ void local_sub(long i, /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take - * a variable, not an address. + * a variable (eg. mystruct.foo), not an address. */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) +#define cpu_local_read(v) local_read(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_set(v, i) local_set(&per_cpu(v, raw_smp_processor_id()), (i)) +#define cpu_local_inc(v) local_inc(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_dec(v) local_dec(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_add(i, v) local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define cpu_local_sub(i, v) local_sub((i), &per_cpu(v, raw_smp_processor_id())) -#define __cpu_local_inc(v) cpu_local_inc(v) -#define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +/* Non-atomic increments, ie. preemption disabled and won't be touched + * in interrupt, etc. Some archs can optimize this case well. + */ +#define __cpu_local_inc(v) __local_inc(&per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_dec(v) __local_dec(&per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_add(i, v) __local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define __cpu_local_sub(i, v) __local_sub((i), &per_cpu(v, raw_smp_processor_id())) #endif /* _ARCH_I386_LOCAL_H */ Index: linux/include/linux/page-flags.h =================================================================== --- linux.orig/include/linux/page-flags.h +++ linux/include/linux/page-flags.h @@ -8,7 +8,7 @@ #include #include #include - +#include #include /* @@ -108,10 +108,6 @@ /* * Light weight per cpu counter implementation. * - * Note that these can race. We do not bother to enable preemption - * or care about interrupt races. All we care about is to have some - * approximate count of events. - * * Counters should only be incremented and no critical kernel component * should rely on the counter values. * @@ -134,24 +130,24 @@ enum vm_event_item { PGPGIN, PGPGOUT, PS }; struct vm_event_state { - unsigned long event[NR_VM_EVENT_ITEMS]; + local_t event[NR_VM_EVENT_ITEMS]; }; DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline unsigned long get_cpu_vm_events(enum vm_event_item item) { - return __get_cpu_var(vm_event_states).event[item]; + return cpu_local_read(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { - __get_cpu_var(vm_event_states).event[item]++; + cpu_local_inc(vm_event_states.event[item]); } static inline void count_vm_events(enum vm_event_item item, long delta) { - __get_cpu_var(vm_event_states).event[item] += delta; + cpu_local_add(delta, vm_event_states.event[item]); } extern void all_vm_events(unsigned long *); Index: linux/mm/page_alloc.c =================================================================== --- linux.orig/mm/page_alloc.c +++ linux/mm/page_alloc.c @@ -1583,7 +1583,7 @@ static void show_node(struct zone *zone) #endif #ifdef CONFIG_VM_EVENT_COUNTERS -DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; +DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{LOCAL_INIT(0)}}; EXPORT_PER_CPU_SYMBOL(vm_event_states); static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) @@ -1604,7 +1604,7 @@ static void sum_vm_events(unsigned long for (i=0; i< NR_VM_EVENT_ITEMS; i++) - ret[i] += this->event[i]; + ret[i] += local_read(&this->event[i]); } } @@ -2881,8 +2881,8 @@ static void vm_events_fold_cpu(int cpu) int i; for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { - count_vm_events(i, fold_state->event[i]); - fold_state->event[i] = 0; + count_vm_events(i, local_read(&fold_state->event[i])); + local_set(&fold_state->event[i], 0); } }