public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* x86: A fast way to check capabilities of the current cpu
@ 2010-12-15 20:07 Christoph Lameter
  2010-12-15 20:11 ` x86: Avoid passing struct cpuinfo pointer to mce_available Christoph Lameter
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Christoph Lameter @ 2010-12-15 20:07 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Tejun Heo, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm


Subject: x86: A fast way to check capabilities of the current cpu

Add this_cpu_has() which determines if the current cpu has a certain
ability using a segment prefix and a bit test operation.

For that we need to add bit operations to x86s percpu.h.

Many uses of cpu_has use a pointer passed to a function to determine
the current flags. That is no longer necessary after this patch.

However, this patch only converts the straightforward cases where
cpu_has is used with this_cpu_ptr. The rest is work for later.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 arch/x86/include/asm/cpufeature.h |   13 +++++++++----
 arch/x86/include/asm/percpu.h     |   27 +++++++++++++++++++++++++++
 arch/x86/kernel/apic/apic.c       |    2 +-
 arch/x86/kernel/process.c         |    4 ++--
 arch/x86/kernel/smpboot.c         |    4 ++--
 5 files changed, 41 insertions(+), 9 deletions(-)

Index: linux-2.6/arch/x86/include/asm/cpufeature.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/cpufeature.h	2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/cpufeature.h	2010-12-15 12:54:48.000000000 -0600
@@ -206,8 +206,7 @@ extern const char * const x86_power_flag
 #define test_cpu_cap(c, bit)						\
 	 test_bit(bit, (unsigned long *)((c)->x86_capability))

-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
+#define REQUIRED_MASK_BIT_SET(bit)					\
 	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
 	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
 	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
@@ -217,10 +216,16 @@ extern const char * const x86_power_flag
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\
 	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )	\
-	  ? 1 :								\
+	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
 	 test_cpu_cap(c, bit))

+#define this_cpu_has(bit)						\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
+	 this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)

 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
Index: linux-2.6/arch/x86/kernel/apic/apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic/apic.c	2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/apic/apic.c	2010-12-15 12:38:53.000000000 -0600
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(v
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);

-	if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+	if (this_cpu_has(X86_FEATURE_ARAT)) {
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 		/* Make LAPIC timer preferrable over percpu HPET */
 		lapic_clockevent.rating = 150;
Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h	2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h	2010-12-15 13:06:27.000000000 -0600
@@ -545,6 +545,33 @@ do {									\
 	old__;								\
 })

+static __always_inline int this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read_stable(*a)) != 0;
+}
+
+static inline int this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+	int oldbit;
+
+	asm volatile("bt "__percpu_arg(2)",%1\n\t"
+			"sbb %0,%0"
+			: "=r" (oldbit)
+			: "m" (*(unsigned long *)addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#define this_cpu_test_bit(nr, addr)			\
+	(__builtin_constant_p((nr))			\
+	 ? this_cpu_constant_test_bit((nr), (addr))	\
+	 : this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>

 /* We can use this directly for local CPU (faster). */
Index: linux-2.6/arch/x86/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/process.c	2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/process.c	2010-12-15 12:38:53.000000000 -0600
@@ -445,7 +445,7 @@ void mwait_idle_with_hints(unsigned long
 {
 	trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
 	if (!need_resched()) {
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);

 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -460,7 +460,7 @@ static void mwait_idle(void)
 {
 	if (!need_resched()) {
 		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);

 		__monitor((void *)&current_thread_info()->flags, 0, 0);
Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c	2010-12-15 12:38:52.000000000 -0600
+++ linux-2.6/arch/x86/kernel/smpboot.c	2010-12-15 12:38:53.000000000 -0600
@@ -1397,9 +1397,9 @@ static inline void mwait_play_dead(void)
 	int i;
 	void *mwait_ptr;

-	if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
+	if (!this_cpu_has(X86_FEATURE_MWAIT))
 		return;
-	if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+	if (!this_cpu_has(X86_FEATURE_CLFLSH))
 		return;
 	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
 		return;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* x86: Avoid passing struct cpuinfo pointer to mce_available
  2010-12-15 20:07 x86: A fast way to check capabilities of the current cpu Christoph Lameter
@ 2010-12-15 20:11 ` Christoph Lameter
  2010-12-15 20:56 ` x86: A fast way to check capabilities of the current cpu Andrew Morton
  2011-01-21 17:11 ` Tejun Heo
  2 siblings, 0 replies; 22+ messages in thread
From: Christoph Lameter @ 2010-12-15 20:11 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Tejun Heo, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm


Subject: x86: Avoid passing struct cpuinfo pointer to mce_available

If we do not pass the pointer to cpuinfio to mce available then its possible
to use this_cpu_has.

There are two use cases of mce_available: One with the current processor
and one with the boot cpu. Define a function for both cases. However, there
is only one case in which boot_mce_available is used. If we somehow can
get rid of that then the patch could be simplified.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 arch/x86/include/asm/mce.h             |    3 +-
 arch/x86/kernel/cpu/mcheck/mce.c       |   41 +++++++++++++++++++--------------
 arch/x86/kernel/cpu/mcheck/mce_intel.c |    2 -
 3 files changed, 27 insertions(+), 19 deletions(-)

Index: linux-2.6/arch/x86/include/asm/mce.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/mce.h	2010-12-15 13:25:37.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/mce.h	2010-12-15 13:25:57.000000000 -0600
@@ -177,7 +177,8 @@ void mce_amd_feature_init(struct cpuinfo
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 #endif

-int mce_available(struct cpuinfo_x86 *c);
+int this_cpu_mce_available(void);
+int boot_mce_available(void);

 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
Index: linux-2.6/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mcheck/mce.c	2010-12-15 13:20:48.000000000 -0600
+++ linux-2.6/arch/x86/kernel/cpu/mcheck/mce.c	2010-12-15 13:33:19.000000000 -0600
@@ -434,11 +434,19 @@ static int mce_ring_add(unsigned long pf
 	return 0;
 }

-int mce_available(struct cpuinfo_x86 *c)
+int this_cpu_mce_available(void)
 {
 	if (mce_disabled)
 		return 0;
-	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+	return this_cpu_has(X86_FEATURE_MCE) && this_cpu_has(X86_FEATURE_MCA);
+}
+
+int boot_mce_available(struct cpuinfo_x86 *c)
+{
+	if (mce_disabled)
+		return 0;
+	return cpu_has(boot_cpu_data, X86_FEATURE_MCE) &&
+			cpu_has(boot_cpu_data, X86_FEATURE_MCA);
 }

 static void mce_schedule_work(void)
@@ -1159,7 +1167,7 @@ static void mce_start_timer(unsigned lon

 	WARN_ON(smp_processor_id() != data);

-	if (mce_available(__this_cpu_ptr(&cpu_info))) {
+	if (this_cpu_mce_available()) {
 		machine_check_poll(MCP_TIMESTAMP,
 				&__get_cpu_var(mce_poll_banks));
 	}
@@ -1373,9 +1381,9 @@ static int __cpuinit __mcheck_cpu_apply_

 static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
 {
-	if (c->x86 != 5)
+	if (this_cpu_read(cpu_info.x86) != 5)
 		return;
-	switch (c->x86_vendor) {
+	switch (this_cpu_read(cpu_info.x86_vendor)) {
 	case X86_VENDOR_INTEL:
 		intel_p5_mcheck_init(c);
 		break;
@@ -1402,17 +1410,16 @@ static void __mcheck_cpu_init_vendor(str
 static void __mcheck_cpu_init_timer(void)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
-	int *n = &__get_cpu_var(mce_next_interval);

 	setup_timer(t, mce_start_timer, smp_processor_id());

 	if (mce_ignore_ce)
 		return;

-	*n = check_interval * HZ;
-	if (!*n)
+	this_cpu_write(mce_next_interval, check_interval * HZ);
+	if (!this_cpu_read(mce_next_interval))
 		return;
-	t->expires = round_jiffies(jiffies + *n);
+	t->expires = round_jiffies(jiffies + this_cpu_read(mce_next_interval));
 	add_timer_on(t, smp_processor_id());
 }

@@ -1438,7 +1445,7 @@ void __cpuinit mcheck_cpu_init(struct cp

 	__mcheck_cpu_ancient_init(c);

-	if (!mce_available(c))
+	if (!this_cpu_mce_available())
 		return;

 	if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
@@ -1775,7 +1782,7 @@ static int mce_resume(struct sys_device
 static void mce_cpu_restart(void *data)
 {
 	del_timer_sync(&__get_cpu_var(mce_timer));
-	if (!mce_available(__this_cpu_ptr(&cpu_info)))
+	if (!this_cpu_mce_available())
 		return;
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_timer();
@@ -1790,7 +1797,7 @@ static void mce_restart(void)
 /* Toggle features for corrected errors */
 static void mce_disable_ce(void *all)
 {
-	if (!mce_available(__this_cpu_ptr(&cpu_info)))
+	if (!this_cpu_mce_available())
 		return;
 	if (all)
 		del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1806,7 @@ static void mce_disable_ce(void *all)

 static void mce_enable_ce(void *all)
 {
-	if (!mce_available(__this_cpu_ptr(&cpu_info)))
+	if (!this_cpu_mce_available())
 		return;
 	cmci_reenable();
 	cmci_recheck();
@@ -1962,7 +1969,7 @@ static __cpuinit int mce_create_device(u
 	int err;
 	int i, j;

-	if (!mce_available(&boot_cpu_data))
+	if (!boot_mce_available())
 		return -EIO;

 	memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
@@ -2022,7 +2029,7 @@ static void __cpuinit mce_disable_cpu(vo
 	unsigned long action = *(unsigned long *)h;
 	int i;

-	if (!mce_available(__this_cpu_ptr(&cpu_info)))
+	if (!this_cpu_mce_available())
 		return;

 	if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2047,7 @@ static void __cpuinit mce_reenable_cpu(v
 	unsigned long action = *(unsigned long *)h;
 	int i;

-	if (!mce_available(__this_cpu_ptr(&cpu_info)))
+	if (!this_cpu_mce_available())
 		return;

 	if (!(action & CPU_TASKS_FROZEN))
@@ -2122,7 +2129,7 @@ static __init int mcheck_init_device(voi
 	int err;
 	int i = 0;

-	if (!mce_available(&boot_cpu_data))
+	if (!boot_mce_available())
 		return -EIO;

 	zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
Index: linux-2.6/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c	2010-12-15 13:24:41.000000000 -0600
+++ linux-2.6/arch/x86/kernel/cpu/mcheck/mce_intel.c	2010-12-15 13:25:23.000000000 -0600
@@ -130,7 +130,7 @@ void cmci_recheck(void)
 	unsigned long flags;
 	int banks;

-	if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
+	if (!this_cpu_mce_available() || !cmci_supported(&banks))
 		return;
 	local_irq_save(flags);
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 20:07 x86: A fast way to check capabilities of the current cpu Christoph Lameter
  2010-12-15 20:11 ` x86: Avoid passing struct cpuinfo pointer to mce_available Christoph Lameter
@ 2010-12-15 20:56 ` Andrew Morton
  2010-12-15 21:03   ` H. Peter Anvin
  2011-01-21 17:11 ` Tejun Heo
  2 siblings, 1 reply; 22+ messages in thread
From: Andrew Morton @ 2010-12-15 20:56 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: H. Peter Anvin, Tejun Heo, Pekka Enbeerg, linux-kernel,
	Eric Dumazet, Mathieu Desnoyers

On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
Christoph Lameter <cl@linux.com> wrote:

> +#define cpu_has(c, bit)							\
> +	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
>  	 test_cpu_cap(c, bit))
> 
> +#define this_cpu_has(bit)						\
> +	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
> +	 this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
> +

Isn't

	a ? 1 : b

a complex way of writing

	a || b

?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 20:56 ` x86: A fast way to check capabilities of the current cpu Andrew Morton
@ 2010-12-15 21:03   ` H. Peter Anvin
  2010-12-15 21:30     ` Miguel Ojeda
  0 siblings, 1 reply; 22+ messages in thread
From: H. Peter Anvin @ 2010-12-15 21:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christoph Lameter, Tejun Heo, Pekka Enbeerg, linux-kernel,
	Eric Dumazet, Mathieu Desnoyers

On 12/15/2010 12:56 PM, Andrew Morton wrote:
> On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
> Christoph Lameter <cl@linux.com> wrote:
> 
>> +#define cpu_has(c, bit)							\
>> +	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
>>  	 test_cpu_cap(c, bit))
>>
>> +#define this_cpu_has(bit)						\
>> +	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
>> +	 this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
>> +
> 
> Isn't
> 
> 	a ? 1 : b
> 
> a complex way of writing
> 
> 	a || b
> 

Not if b is not a bool.

	-hpa


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 21:03   ` H. Peter Anvin
@ 2010-12-15 21:30     ` Miguel Ojeda
  2010-12-15 21:39       ` H. Peter Anvin
  0 siblings, 1 reply; 22+ messages in thread
From: Miguel Ojeda @ 2010-12-15 21:30 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Christoph Lameter, Tejun Heo, Pekka Enbeerg,
	linux-kernel, Eric Dumazet, Mathieu Desnoyers

On Wed, Dec 15, 2010 at 10:03 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 12/15/2010 12:56 PM, Andrew Morton wrote:
>> On Wed, 15 Dec 2010 14:07:39 -0600 (CST)
>> Christoph Lameter <cl@linux.com> wrote:
>>
>>> +#define cpu_has(c, bit)                                                     \
>>> +    (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
>>>       test_cpu_cap(c, bit))
>>>
>>> +#define this_cpu_has(bit)                                           \
>>> +    (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
>>> +     this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
>>> +
>>
>> Isn't
>>
>>       a ? 1 : b
>>
>> a complex way of writing
>>
>>       a || b
>>
>
> Not if b is not a bool.
>

In this case it this_cpu_*_test_bit() return an int, but they act as a
bool and are used in if()s; where is the catch?

>        -hpa
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 21:30     ` Miguel Ojeda
@ 2010-12-15 21:39       ` H. Peter Anvin
  2010-12-15 21:48         ` Miguel Ojeda
  2010-12-16  6:25         ` Miles Bader
  0 siblings, 2 replies; 22+ messages in thread
From: H. Peter Anvin @ 2010-12-15 21:39 UTC (permalink / raw)
  To: Miguel Ojeda
  Cc: Andrew Morton, Christoph Lameter, Tejun Heo, Pekka Enbeerg,
	linux-kernel, Eric Dumazet, Mathieu Desnoyers

On 12/15/2010 01:30 PM, Miguel Ojeda wrote:
> 
> In this case it this_cpu_*_test_bit() return an int, but they act as a
> bool and are used in if()s; where is the catch?
> 

If they aren't, and are stored in a variable for whatever reason, then
the || form will generate additional instructions to booleanize the
value for no good reason.

	-hpa

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 21:39       ` H. Peter Anvin
@ 2010-12-15 21:48         ` Miguel Ojeda
  2010-12-16  6:25         ` Miles Bader
  1 sibling, 0 replies; 22+ messages in thread
From: Miguel Ojeda @ 2010-12-15 21:48 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Andrew Morton, Christoph Lameter, Tejun Heo, Pekka Enbeerg,
	linux-kernel, Eric Dumazet, Mathieu Desnoyers

On Wed, Dec 15, 2010 at 10:39 PM, H. Peter Anvin <hpa@zytor.com> wrote:
> On 12/15/2010 01:30 PM, Miguel Ojeda wrote:
>>
>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>> bool and are used in if()s; where is the catch?
>>
>
> If they aren't, and are stored in a variable for whatever reason, then
> the || form will generate additional instructions to booleanize the
> value for no good reason.

Thanks! I suppose that is the 't' of being "a fast way" ;-)

>
>        -hpa
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 21:39       ` H. Peter Anvin
  2010-12-15 21:48         ` Miguel Ojeda
@ 2010-12-16  6:25         ` Miles Bader
  2010-12-16 10:17           ` Miguel Ojeda
  1 sibling, 1 reply; 22+ messages in thread
From: Miles Bader @ 2010-12-16  6:25 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Miguel Ojeda, Andrew Morton, Christoph Lameter, Tejun Heo,
	Pekka Enbeerg, linux-kernel, Eric Dumazet, Mathieu Desnoyers

"H. Peter Anvin" <hpa@zytor.com> writes:
>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>> bool and are used in if()s; where is the catch?
>
> If they aren't, and are stored in a variable for whatever reason, then
> the || form will generate additional instructions to booleanize the
> value for no good reason.

It doesn't actually have to "booleanize" the value if it's used in a
boolean context though (and, AFAICT, usually won't).

My vague impression is that when used in a boolean context, gcc will
often generate the same or "equivalent" code for both variants -- but
sometimes a||b seems to generate better code; e.g.:

   static inline int test1a (int a, int b) { return a ? 1 : b; }
   int test1b (int a, int b) { if (test1a (a,b)) return a+b; else return 37; }

   static inline int test2a (int a, int b) { return a || b; }
   int test2b (int a, int b) { if (test2a (a,b)) return a+b; else return 37; }

=>

test1b:
	testl	%edi, %edi
	jne	.L2
	movl	$37, %eax
	testl	%esi, %esi
	jne	.L2
	rep
	ret
.L2:
	leal	(%rsi,%rdi), %eax
	ret

test2b:
	leal	(%rsi,%rdi), %edx
	movl	$37, %eax
	orl	%edi, %esi
	cmovne	%edx, %eax
	ret

	.ident	"GCC: (Debian 4.5.1-8) 4.5.1"


-Miles

-- 
Is it true that nothing can be known?  If so how do we know this?  -Woody Allen

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-16  6:25         ` Miles Bader
@ 2010-12-16 10:17           ` Miguel Ojeda
  2010-12-16 10:29             ` Miles Bader
  0 siblings, 1 reply; 22+ messages in thread
From: Miguel Ojeda @ 2010-12-16 10:17 UTC (permalink / raw)
  To: Miles Bader
  Cc: H. Peter Anvin, Andrew Morton, Christoph Lameter, Tejun Heo,
	Pekka Enbeerg, linux-kernel, Eric Dumazet, Mathieu Desnoyers

On Thu, Dec 16, 2010 at 7:25 AM, Miles Bader <miles@gnu.org> wrote:
> "H. Peter Anvin" <hpa@zytor.com> writes:
>>> In this case it this_cpu_*_test_bit() return an int, but they act as a
>>> bool and are used in if()s; where is the catch?
>>
>> If they aren't, and are stored in a variable for whatever reason, then
>> the || form will generate additional instructions to booleanize the
>> value for no good reason.
>
> It doesn't actually have to "booleanize" the value if it's used in a
> boolean context though (and, AFAICT, usually won't).
>
> My vague impression is that when used in a boolean context, gcc will
> often generate the same or "equivalent" code for both variants -- but
> sometimes a||b seems to generate better code; e.g.:
>
>   static inline int test1a (int a, int b) { return a ? 1 : b; }
>   int test1b (int a, int b) { if (test1a (a,b)) return a+b; else return 37; }
>
>   static inline int test2a (int a, int b) { return a || b; }
>   int test2b (int a, int b) { if (test2a (a,b)) return a+b; else return 37; }
>

I think hpa was talking about some code where gcc can not optimize out
the assignment (e.g. volatile, complex code, using the int outside
conditional expressions, etc.).

>=>
>
> test1b:
>        testl   %edi, %edi
>        jne     .L2
>        movl    $37, %eax
>        testl   %esi, %esi
>        jne     .L2
>        rep
>        ret
> .L2:
>        leal    (%rsi,%rdi), %eax
>        ret
>
> test2b:
>        leal    (%rsi,%rdi), %edx
>        movl    $37, %eax
>        orl     %edi, %esi
>        cmovne  %edx, %eax
>        ret
>
>        .ident  "GCC: (Debian 4.5.1-8) 4.5.1"
>
>
> -Miles
>
> --
> Is it true that nothing can be known?  If so how do we know this?  -Woody Allen
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-16 10:17           ` Miguel Ojeda
@ 2010-12-16 10:29             ` Miles Bader
  2010-12-16 15:38               ` H. Peter Anvin
  0 siblings, 1 reply; 22+ messages in thread
From: Miles Bader @ 2010-12-16 10:29 UTC (permalink / raw)
  To: Miguel Ojeda
  Cc: H. Peter Anvin, Andrew Morton, Christoph Lameter, Tejun Heo,
	Pekka Enbeerg, linux-kernel, Eric Dumazet, Mathieu Desnoyers

Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> writes:
>>> If they aren't, and are stored in a variable for whatever reason, then
>>> the || form will generate additional instructions to booleanize the
>>> value for no good reason.
>
> I think hpa was talking about some code where gcc can not optimize out
> the assignment (e.g. volatile, complex code, using the int outside
> conditional expressions, etc.).

Sure, but that seems to assume that the alternatives are otherwise
equivalent in the common case, when used in a boolean context.

If that's not true then one risks pessimizing the common case to make an
uncommon case more efficient.

-Miles

-- 
Suburbia: where they tear out the trees and then name streets after them.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-16 10:29             ` Miles Bader
@ 2010-12-16 15:38               ` H. Peter Anvin
  2010-12-17  4:26                 ` Miles Bader
  0 siblings, 1 reply; 22+ messages in thread
From: H. Peter Anvin @ 2010-12-16 15:38 UTC (permalink / raw)
  To: Miles Bader
  Cc: Miguel Ojeda, Andrew Morton, Christoph Lameter, Tejun Heo,
	Pekka Enbeerg, linux-kernel, Eric Dumazet, Mathieu Desnoyers

On 12/16/2010 02:29 AM, Miles Bader wrote:
> Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> writes:
>>>> If they aren't, and are stored in a variable for whatever reason, then
>>>> the || form will generate additional instructions to booleanize the
>>>> value for no good reason.
>>
>> I think hpa was talking about some code where gcc can not optimize out
>> the assignment (e.g. volatile, complex code, using the int outside
>> conditional expressions, etc.).
> 
> Sure, but that seems to assume that the alternatives are otherwise
> equivalent in the common case, when used in a boolean context.
> 
> If that's not true then one risks pessimizing the common case to make an
> uncommon case more efficient.
> 

The alternatives are equivalent when used in the common context.  Your
examples are bogus, because they don't account for the
__builtin_constant_p().

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-16 15:38               ` H. Peter Anvin
@ 2010-12-17  4:26                 ` Miles Bader
  0 siblings, 0 replies; 22+ messages in thread
From: Miles Bader @ 2010-12-17  4:26 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Miguel Ojeda, Andrew Morton, Christoph Lameter, Tejun Heo,
	Pekka Enbeerg, linux-kernel, Eric Dumazet, Mathieu Desnoyers

"H. Peter Anvin" <hpa@zytor.com> writes:
> The alternatives are equivalent when used in the common context.  Your
> examples are bogus, because they don't account for the
> __builtin_constant_p().

Ah, true ... :}

-miles

-- 
Abstainer, n. A weak person who yields to the temptation of denying himself a
pleasure. A total abstainer is one who abstains from everything but
abstention, and especially from inactivity in the affairs of others.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2010-12-15 20:07 x86: A fast way to check capabilities of the current cpu Christoph Lameter
  2010-12-15 20:11 ` x86: Avoid passing struct cpuinfo pointer to mce_available Christoph Lameter
  2010-12-15 20:56 ` x86: A fast way to check capabilities of the current cpu Andrew Morton
@ 2011-01-21 17:11 ` Tejun Heo
  2011-01-21 17:21   ` Christoph Lameter
  2 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2011-01-21 17:11 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

Hello, Christoph.

I was trying to forward this to x86 tree but spotted a problem.

On Wed, Dec 15, 2010 at 02:07:39PM -0600, Christoph Lameter wrote:
> +static __always_inline int this_cpu_constant_test_bit(unsigned int nr,
> +                        const unsigned long __percpu *addr)
> +{
> +	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
> +
> +	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read_stable(*a)) != 0;
> +}

I don't think percpu_read_stable() can be used here.  It's not
guaranteed to be stable across different cpus.

Also, can we just implement what's necessary on top of this_cpu_has()?
this_cpu_has() already has constant handling, so there's no need to
add this_cpu_test_bit() at this point.

Thank you.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:11 ` Tejun Heo
@ 2011-01-21 17:21   ` Christoph Lameter
  2011-01-21 17:28     ` Tejun Heo
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Lameter @ 2011-01-21 17:21 UTC (permalink / raw)
  To: Tejun Heo
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, 21 Jan 2011, Tejun Heo wrote:

> Hello, Christoph.
>
> I was trying to forward this to x86 tree but spotted a problem.
>
> On Wed, Dec 15, 2010 at 02:07:39PM -0600, Christoph Lameter wrote:
> > +static __always_inline int this_cpu_constant_test_bit(unsigned int nr,
> > +                        const unsigned long __percpu *addr)
> > +{
> > +	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
> > +
> > +	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read_stable(*a)) != 0;
> > +}
>
> I don't think percpu_read_stable() can be used here.  It's not
> guaranteed to be stable across different cpus.

Why would that matter? The caller has to disabled preemption anyways since
otherwise the processor may change which means that the result of the
operation is useless.

> Also, can we just implement what's necessary on top of this_cpu_has()?
> this_cpu_has() already has constant handling, so there's no need to
> add this_cpu_test_bit() at this point.

Not sure what you mean. this_cpu_test_bit is necessary because
test_cpu_cap expects a regular pointer and performs a regular load.
this_cpu_constant_test_bit handles the segment prefix necessary for a per
cpu load.

The constant refers to the bit.



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:21   ` Christoph Lameter
@ 2011-01-21 17:28     ` Tejun Heo
  2011-01-21 17:46       ` Christoph Lameter
  0 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2011-01-21 17:28 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, Jan 21, 2011 at 11:21:02AM -0600, Christoph Lameter wrote:
> > I don't think percpu_read_stable() can be used here.  It's not
> > guaranteed to be stable across different cpus.
> 
> Why would that matter? The caller has to disabled preemption anyways since
> otherwise the processor may change which means that the result of the
> operation is useless.

Because

	preempt_disable();
	this_cpu_has();
	preempt_enable();
	preempt_disable();
	this_cpu_has();
	preempt_enable();

might malfunction.  percpu_read_stable() is pretty much applicable
only to stuff local to the thread.

> > Also, can we just implement what's necessary on top of this_cpu_has()?
> > this_cpu_has() already has constant handling, so there's no need to
> > add this_cpu_test_bit() at this point.
> 
> Not sure what you mean. this_cpu_test_bit is necessary because
> test_cpu_cap expects a regular pointer and performs a regular load.
> this_cpu_constant_test_bit handles the segment prefix necessary for a per
> cpu load.
> 
> The constant refers to the bit.

Oh, you're right.  Sorry about that.  Can you please then add a
comment noting that the operation is x86 only?  Maybe prefix it with
x86_?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:28     ` Tejun Heo
@ 2011-01-21 17:46       ` Christoph Lameter
  2011-01-21 17:48         ` Tejun Heo
  2011-01-21 22:32         ` H. Peter Anvin
  0 siblings, 2 replies; 22+ messages in thread
From: Christoph Lameter @ 2011-01-21 17:46 UTC (permalink / raw)
  To: Tejun Heo
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, 21 Jan 2011, Tejun Heo wrote:

> On Fri, Jan 21, 2011 at 11:21:02AM -0600, Christoph Lameter wrote:
> > > I don't think percpu_read_stable() can be used here.  It's not
> > > guaranteed to be stable across different cpus.
> >
> > Why would that matter? The caller has to disabled preemption anyways since
> > otherwise the processor may change which means that the result of the
> > operation is useless.
>
> Because
>
> 	preempt_disable();
> 	this_cpu_has();
> 	preempt_enable();
> 	preempt_disable();
> 	this_cpu_has();
> 	preempt_enable();
>
> might malfunction.  percpu_read_stable() is pretty much applicable
> only to stuff local to the thread.

Ok then lets change it to percpu_read

> > > Also, can we just implement what's necessary on top of this_cpu_has()?
> > > this_cpu_has() already has constant handling, so there's no need to
> > > add this_cpu_test_bit() at this point.
> >
> > Not sure what you mean. this_cpu_test_bit is necessary because
> > test_cpu_cap expects a regular pointer and performs a regular load.
> > this_cpu_constant_test_bit handles the segment prefix necessary for a per
> > cpu load.
> >
> > The constant refers to the bit.
>
> Oh, you're right.  Sorry about that.  Can you please then add a
> comment noting that the operation is x86 only?  Maybe prefix it with
> x86_?

For a function defined in an specific include file and only used in arch
specific code?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:46       ` Christoph Lameter
@ 2011-01-21 17:48         ` Tejun Heo
  2011-01-21 17:57           ` Christoph Lameter
  2011-01-21 22:32         ` H. Peter Anvin
  1 sibling, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2011-01-21 17:48 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, Jan 21, 2011 at 11:46:04AM -0600, Christoph Lameter wrote:
> > Oh, you're right.  Sorry about that.  Can you please then add a
> > comment noting that the operation is x86 only?  Maybe prefix it with
> > x86_?
> 
> For a function defined in an specific include file and only used in arch
> specific code?

Hmm?  asm/percpu.h gets included by linux/percpu.h, so it would end up
in most .c files.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:48         ` Tejun Heo
@ 2011-01-21 17:57           ` Christoph Lameter
  2011-01-21 18:12             ` Tejun Heo
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Lameter @ 2011-01-21 17:57 UTC (permalink / raw)
  To: Tejun Heo
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, 21 Jan 2011, Tejun Heo wrote:

> On Fri, Jan 21, 2011 at 11:46:04AM -0600, Christoph Lameter wrote:
> > > Oh, you're right.  Sorry about that.  Can you please then add a
> > > comment noting that the operation is x86 only?  Maybe prefix it with
> > > x86_?
> >
> > For a function defined in an specific include file and only used in arch
> > specific code?
>
> Hmm?  asm/percpu.h gets included by linux/percpu.h, so it would end up
> in most .c files.

The function is defined in arch/x86/include/asm/percpu.h. At least my
patches here are that way.

#include "asm/percpu.h" will get you include/asm-generic/percpu.h on
other arches.



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:57           ` Christoph Lameter
@ 2011-01-21 18:12             ` Tejun Heo
  2011-01-21 18:20               ` Christoph Lameter
  0 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2011-01-21 18:12 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, Jan 21, 2011 at 11:57:09AM -0600, Christoph Lameter wrote:
> > Hmm?  asm/percpu.h gets included by linux/percpu.h, so it would end up
> > in most .c files.
> 
> The function is defined in arch/x86/include/asm/percpu.h. At least my
> patches here are that way.
> 
> #include "asm/percpu.h" will get you include/asm-generic/percpu.h on
> other arches.

Yeah, I was referring to generic code having visibility to the x86
specific this_cpu op which isn't available on other archs.  I don't
feel too strong about it so if you don't like it, just add a comment.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 18:12             ` Tejun Heo
@ 2011-01-21 18:20               ` Christoph Lameter
  0 siblings, 0 replies; 22+ messages in thread
From: Christoph Lameter @ 2011-01-21 18:20 UTC (permalink / raw)
  To: Tejun Heo
  Cc: H. Peter Anvin, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm



On Fri, 21 Jan 2011, Tejun Heo wrote:

> On Fri, Jan 21, 2011 at 11:57:09AM -0600, Christoph Lameter wrote:
> > > Hmm?  asm/percpu.h gets included by linux/percpu.h, so it would end up
> > > in most .c files.
> >
> > The function is defined in arch/x86/include/asm/percpu.h. At least my
> > patches here are that way.
> >
> > #include "asm/percpu.h" will get you include/asm-generic/percpu.h on
> > other arches.
>
> Yeah, I was referring to generic code having visibility to the x86
> specific this_cpu op which isn't available on other archs.  I don't
> feel too strong about it so if you don't like it, just add a comment.

The code segments in generic code that use this function are in #ifdef
CONFIG_X86 sections.

Where do you want me to add a comment?

In include/x86/include/asm/percpu.h saying that the operations defined
there are x86 specific? Or in the #ifdef CONFIG_X86 sections saying that
the this_cpu_has operations are x86 specific?



Code snippet from drivers/acpi/processor_throttling.c

...

#ifdef CONFIG_X86
static int acpi_throttling_rdmsr(u64 *value)
{
        u64 msr_high, msr_low;
        u64 msr = 0;
        int ret = -1;

        if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
                !this_cpu_has(X86_FEATURE_ACPI)) {
                printk(KERN_ERR PREFIX
                        "HARDWARE addr space,NOT supported yet\n");
        } else {
                msr_low = 0;
                msr_high = 0;
                rdmsr_safe(MSR_IA32_THERM_CONTROL,
                        (u32 *)&msr_low , (u32 *) &msr_high);
                msr = (msr_high << 32) | msr_low;
                *value = (u64) msr;
                ret = 0;
        }
        return ret;
}



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 17:46       ` Christoph Lameter
  2011-01-21 17:48         ` Tejun Heo
@ 2011-01-21 22:32         ` H. Peter Anvin
  2011-01-24 17:05           ` Christoph Lameter
  1 sibling, 1 reply; 22+ messages in thread
From: H. Peter Anvin @ 2011-01-21 22:32 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Tejun Heo, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On 01/21/2011 09:46 AM, Christoph Lameter wrote:
>> Oh, you're right.  Sorry about that.  Can you please then add a
>> comment noting that the operation is x86 only?  Maybe prefix it with
>> x86_?
> 
> For a function defined in an specific include file and only used in arch
> specific code?

Yes, x86_ prefix to indicate it's an x86-specific interface, as opposed
to arch_ which is a architecture-specific component of the
implementation of a generic interface.

	-hpa

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: x86: A fast way to check capabilities of the current cpu
  2011-01-21 22:32         ` H. Peter Anvin
@ 2011-01-24 17:05           ` Christoph Lameter
  0 siblings, 0 replies; 22+ messages in thread
From: Christoph Lameter @ 2011-01-24 17:05 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Tejun Heo, Pekka Enbeerg, linux-kernel, Eric Dumazet,
	Mathieu Desnoyers, akpm

On Fri, 21 Jan 2011, H. Peter Anvin wrote:

> On 01/21/2011 09:46 AM, Christoph Lameter wrote:
> >> Oh, you're right.  Sorry about that.  Can you please then add a
> >> comment noting that the operation is x86 only?  Maybe prefix it with
> >> x86_?
> >
> > For a function defined in an specific include file and only used in arch
> > specific code?
>
> Yes, x86_ prefix to indicate it's an x86-specific interface, as opposed
> to arch_ which is a architecture-specific component of the
> implementation of a generic interface.

Subject: x86,percpu: Add x86_ prefix to this_cpu_test_bit and do not use percpu_read_stable

Add a prefix to this_cpu_test_bit and friends and do not use percpu_read_stable for
checking per cpu features.

Signed-off-by: Christoph Lameter <cl@linux.com>


---
 arch/x86/include/asm/cpufeature.h |    2 +-
 arch/x86/include/asm/percpu.h     |   12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

Index: linux-2.6/arch/x86/include/asm/cpufeature.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/cpufeature.h	2011-01-24 09:18:53.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/cpufeature.h	2011-01-24 09:19:02.000000000 -0600
@@ -224,7 +224,7 @@ extern const char * const x86_power_flag

 #define this_cpu_has(bit)						\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
-	 this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))

 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)

Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h	2011-01-24 09:18:53.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h	2011-01-24 09:24:21.000000000 -0600
@@ -492,15 +492,15 @@ do {									\
 	old__;								\
 })

-static __always_inline int this_cpu_constant_test_bit(unsigned int nr,
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {
 	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;

-	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read_stable(*a)) != 0;
+	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
 }

-static inline int this_cpu_variable_test_bit(int nr,
+static inline int x86_this_cpu_variable_test_bit(int nr,
                         const unsigned long __percpu *addr)
 {
 	int oldbit;
@@ -513,10 +513,10 @@ static inline int this_cpu_variable_test
 	return oldbit;
 }

-#define this_cpu_test_bit(nr, addr)			\
+#define x86_this_cpu_test_bit(nr, addr)			\
 	(__builtin_constant_p((nr))			\
-	 ? this_cpu_constant_test_bit((nr), (addr))	\
-	 : this_cpu_variable_test_bit((nr), (addr)))
+	 ? x86_this_cpu_constant_test_bit((nr), (addr))	\
+	 : x86_this_cpu_variable_test_bit((nr), (addr)))


 #include <asm-generic/percpu.h>

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2011-01-24 17:05 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-15 20:07 x86: A fast way to check capabilities of the current cpu Christoph Lameter
2010-12-15 20:11 ` x86: Avoid passing struct cpuinfo pointer to mce_available Christoph Lameter
2010-12-15 20:56 ` x86: A fast way to check capabilities of the current cpu Andrew Morton
2010-12-15 21:03   ` H. Peter Anvin
2010-12-15 21:30     ` Miguel Ojeda
2010-12-15 21:39       ` H. Peter Anvin
2010-12-15 21:48         ` Miguel Ojeda
2010-12-16  6:25         ` Miles Bader
2010-12-16 10:17           ` Miguel Ojeda
2010-12-16 10:29             ` Miles Bader
2010-12-16 15:38               ` H. Peter Anvin
2010-12-17  4:26                 ` Miles Bader
2011-01-21 17:11 ` Tejun Heo
2011-01-21 17:21   ` Christoph Lameter
2011-01-21 17:28     ` Tejun Heo
2011-01-21 17:46       ` Christoph Lameter
2011-01-21 17:48         ` Tejun Heo
2011-01-21 17:57           ` Christoph Lameter
2011-01-21 18:12             ` Tejun Heo
2011-01-21 18:20               ` Christoph Lameter
2011-01-21 22:32         ` H. Peter Anvin
2011-01-24 17:05           ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox