Specific support for Intel Atom architecture

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* Specific support for Intel Atom architecture
@ 2009-04-30 12:08 Tobias Doerffel
  2009-04-30 15:40 ` Ingo Molnar
  2009-05-04  7:22 ` Andi Kleen
  0 siblings, 2 replies; 26+ messages in thread
From: Tobias Doerffel @ 2009-04-30 12:08 UTC (permalink / raw)
  To: LKML


[-- Attachment #1.1: Type: text/plain, Size: 817 bytes --]

Hi,

as some of you already might know, work is going on to make GCC fully support 
Intel Atom architecture specifics, i.e. make -mtune=atom generate code 
optimized for in-order architectures like Intel Atom [1].

I therefore started to make up a small patch which adds Intel Atom as a new 
processor family which can be selected upon configuration. It's nothing 
special and also requires a patched GCC. I'd just like to get some feedback on 
it, i.e. is X86_L1_CACHE_SHIFT=6 ok for Atom CPUs (I was not able to find any 
information on Atom's cacheline size)? Any chance to include this patch once 
the Atom patch went into GCC mainline (probably in GCC 4.5)? Any other 
objections?

Please Cc me, I'm not on the list.

Regards,

Tobias

[1] http://gcc.gnu.org/viewcvs/branches/ix86/atom/



[-- Attachment #1.2: 0001-x86-add-specific-support-for-Intel-Atom-architectur.patch --]
[-- Type: text/x-patch, Size: 4773 bytes --]

From 6aa86b4431619d38849d469c70904afe1e5a8ca0 Mon Sep 17 00:00:00 2001
From: Tobias Doerffel <tobias.doerffel@gmail.com>
Date: Thu, 30 Apr 2009 12:36:46 +0200
Subject: [PATCH] x86: add specific support for Intel Atom architecture

This adds another option when selecting CPU family so the kernel can
be optimized for Intel Atom CPUs. This patch requires a GCC with a
patch applied which adds specific Intel Atom support.
---
 arch/x86/Kconfig.cpu          |   19 ++++++++++++++-----
 arch/x86/Makefile_32.cpu      |    1 +
 arch/x86/include/asm/module.h |    2 ++
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8130334..8e565b7 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -262,6 +262,15 @@ config MCORE2
 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
 	  (not a typo)
 
+config MATOM
+	bool "Intel Atom"
+	depends on X86_32
+	---help---
+
+	  Select this for Intel Atom platform. Intel Atom CPUs have an in-order
+	  pipelining architecture and thus can benefit from in-order optimized
+	  code (requires Intel Atom patch in GCC).
+
 config GENERIC_CPU
 	bool "Generic-x86-64"
 	depends on X86_64
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
 	default "7" if MPENTIUM4 || MPSC
 	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 
 config X86_XADD
 	def_bool y
@@ -355,11 +364,11 @@ config X86_ALIGNMENT_16
 
 config X86_INTEL_USERCOPY
 	def_bool y
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MATOM
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
 
 config X86_USE_3DNOW
 	def_bool y
@@ -387,7 +396,7 @@ config X86_P6_NOP
 
 config X86_TSC
 	def_bool y
-	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
+	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
 
 config X86_CMPXCHG64
 	def_bool y
@@ -397,7 +406,7 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64)
+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 80177ec..07a11b0 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_MVIAC7)		+= -march=i686
 cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
+cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)	+= -march=i486
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d6274..e959c4a 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -28,6 +28,8 @@ struct mod_arch_specific {};
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
 #define MODULE_PROC_FAMILY "686 "
 #elif defined CONFIG_MPENTIUMII
-- 
1.6.2.4


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-04-30 12:08 Specific support for Intel Atom architecture Tobias Doerffel
@ 2009-04-30 15:40 ` Ingo Molnar
  2009-04-30 17:03   ` H. Peter Anvin
  2009-04-30 17:10   ` H. Peter Anvin
  2009-05-04  7:22 ` Andi Kleen
  1 sibling, 2 replies; 26+ messages in thread
From: Ingo Molnar @ 2009-04-30 15:40 UTC (permalink / raw)
  To: Tobias Doerffel, H. Peter Anvin, Thomas Gleixner,
	Arjan van de Ven, Suresh Siddha, Pallipadi, Venkatesh
  Cc: LKML


* Tobias Doerffel <tobias.doerffel@gmail.com> wrote:

> Hi,
> 
> as some of you already might know, work is going on to make GCC 
> fully support Intel Atom architecture specifics, i.e. make 
> -mtune=atom generate code optimized for in-order architectures 
> like Intel Atom [1].
> 
> I therefore started to make up a small patch which adds Intel Atom 
> as a new processor family which can be selected upon 
> configuration. It's nothing special and also requires a patched 
> GCC. I'd just like to get some feedback on it, i.e. is 
> X86_L1_CACHE_SHIFT=6 ok for Atom CPUs (I was not able to find any 
> information on Atom's cacheline size)? Any chance to include this 
> patch once the Atom patch went into GCC mainline (probably in GCC 
> 4.5)? Any other objections?
> 
> Please Cc me, I'm not on the list.
> 
> Regards,
> 
> Tobias
> 
> [1] http://gcc.gnu.org/viewcvs/branches/ix86/atom/
> 
> 

> From 6aa86b4431619d38849d469c70904afe1e5a8ca0 Mon Sep 17 00:00:00 2001
> From: Tobias Doerffel <tobias.doerffel@gmail.com>
> Date: Thu, 30 Apr 2009 12:36:46 +0200
> Subject: [PATCH] x86: add specific support for Intel Atom architecture
> 
> This adds another option when selecting CPU family so the kernel can
> be optimized for Intel Atom CPUs. This patch requires a GCC with a
> patch applied which adds specific Intel Atom support.
> ---
>  arch/x86/Kconfig.cpu          |   19 ++++++++++++++-----
>  arch/x86/Makefile_32.cpu      |    1 +
>  arch/x86/include/asm/module.h |    2 ++
>  3 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
> index 8130334..8e565b7 100644
> --- a/arch/x86/Kconfig.cpu
> +++ b/arch/x86/Kconfig.cpu
> @@ -262,6 +262,15 @@ config MCORE2
>  	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
>  	  (not a typo)
>  
> +config MATOM
> +	bool "Intel Atom"
> +	depends on X86_32
> +	---help---
> +
> +	  Select this for Intel Atom platform. Intel Atom CPUs have an in-order
> +	  pipelining architecture and thus can benefit from in-order optimized
> +	  code (requires Intel Atom patch in GCC).
> +
>  config GENERIC_CPU
>  	bool "Generic-x86-64"
>  	depends on X86_64
> @@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
>  	default "7" if MPENTIUM4 || MPSC
>  	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
>  	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
> -	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
> +	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
>  
>  config X86_XADD
>  	def_bool y
> @@ -355,11 +364,11 @@ config X86_ALIGNMENT_16
>  
>  config X86_INTEL_USERCOPY
>  	def_bool y
> -	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
> +	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MATOM
>  
>  config X86_USE_PPRO_CHECKSUM
>  	def_bool y
> -	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
> +	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
>  
>  config X86_USE_3DNOW
>  	def_bool y
> @@ -387,7 +396,7 @@ config X86_P6_NOP
>  
>  config X86_TSC
>  	def_bool y
> -	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
> +	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
>  
>  config X86_CMPXCHG64
>  	def_bool y
> @@ -397,7 +406,7 @@ config X86_CMPXCHG64
>  # generates cmov.
>  config X86_CMOV
>  	def_bool y
> -	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64)
> +	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
>  
>  config X86_MINIMUM_CPU_FAMILY
>  	int
> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> index 80177ec..07a11b0 100644
> --- a/arch/x86/Makefile_32.cpu
> +++ b/arch/x86/Makefile_32.cpu
> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
>  cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
>  cflags-$(CONFIG_MVIAC7)		+= -march=i686
>  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
> +cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
>  
>  # AMD Elan support
>  cflags-$(CONFIG_X86_ELAN)	+= -march=i486
> diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
> index 47d6274..e959c4a 100644
> --- a/arch/x86/include/asm/module.h
> +++ b/arch/x86/include/asm/module.h
> @@ -28,6 +28,8 @@ struct mod_arch_specific {};
>  #define MODULE_PROC_FAMILY "586MMX "
>  #elif defined CONFIG_MCORE2
>  #define MODULE_PROC_FAMILY "CORE2 "
> +#elif defined CONFIG_MATOM
> +#define MODULE_PROC_FAMILY "ATOM "
>  #elif defined CONFIG_M686
>  #define MODULE_PROC_FAMILY "686 "
>  #elif defined CONFIG_MPENTIUMII

Makes sense. One question would be X86_L1_CACHE_SHIFT - you set it 
to 2^6 == 64 - that's correct i think, most Atoms come with 64 byte 
L2 cache AFAIK.

I've Cc:-ed Intel folks - is this assumption about 64 bytes correct?

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-04-30 15:40 ` Ingo Molnar
@ 2009-04-30 17:03   ` H. Peter Anvin
  2009-04-30 17:10   ` H. Peter Anvin
  1 sibling, 0 replies; 26+ messages in thread
From: H. Peter Anvin @ 2009-04-30 17:03 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Tobias Doerffel, Thomas Gleixner, Arjan van de Ven, Suresh Siddha,
	Pallipadi, Venkatesh, LKML

Ingo Molnar wrote:
> 
> Makes sense. One question would be X86_L1_CACHE_SHIFT - you set it 
> to 2^6 == 64 - that's correct i think, most Atoms come with 64 byte 
> L2 cache AFAIK.
> 
> I've Cc:-ed Intel folks - is this assumption about 64 bytes correct?
> 

Seems to be.  At least that's what CPUID reports.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-04-30 15:40 ` Ingo Molnar
  2009-04-30 17:03   ` H. Peter Anvin
@ 2009-04-30 17:10   ` H. Peter Anvin
  2009-05-03  5:38     ` Willy Tarreau
  1 sibling, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2009-04-30 17:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Tobias Doerffel, Thomas Gleixner, Arjan van de Ven, Suresh Siddha,
	Pallipadi, Venkatesh, LKML

Ingo Molnar wrote:
>> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
>> index 80177ec..07a11b0 100644
>> --- a/arch/x86/Makefile_32.cpu
>> +++ b/arch/x86/Makefile_32.cpu
>> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
>>  cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
>>  cflags-$(CONFIG_MVIAC7)		+= -march=i686
>>  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
>> +cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
>>  

There should be a fallback option used here rather than requiring a new
gcc, e.g. something like:

$(call cc-option,-march=atom,-march=i686)

	-hpa
-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-04-30 17:10   ` H. Peter Anvin
@ 2009-05-03  5:38     ` Willy Tarreau
  2009-05-03  6:48       ` H. Peter Anvin
  2009-05-03 14:53       ` Arjan van de Ven
  0 siblings, 2 replies; 26+ messages in thread
From: Willy Tarreau @ 2009-05-03  5:38 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Tobias Doerffel, Thomas Gleixner, Arjan van de Ven,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

On Thu, Apr 30, 2009 at 10:10:08AM -0700, H. Peter Anvin wrote:
> Ingo Molnar wrote:
> >> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> >> index 80177ec..07a11b0 100644
> >> --- a/arch/x86/Makefile_32.cpu
> >> +++ b/arch/x86/Makefile_32.cpu
> >> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
> >>  cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
> >>  cflags-$(CONFIG_MVIAC7)		+= -march=i686
> >>  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
> >> +cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
> >>  
> 
> There should be a fallback option used here rather than requiring a new
> gcc, e.g. something like:
> 
> $(call cc-option,-march=atom,-march=i686)

if it's an in-order architecture, wouldn't it be better to tune for i386
or i486 instead ?

Willy


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03  5:38     ` Willy Tarreau
@ 2009-05-03  6:48       ` H. Peter Anvin
  2009-05-03 11:08         ` Tobias Doerffel
  2009-05-03 14:53       ` Arjan van de Ven
  1 sibling, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2009-05-03  6:48 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Ingo Molnar, Tobias Doerffel, Thomas Gleixner, Arjan van de Ven,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

Willy Tarreau wrote:
>>
>> $(call cc-option,-march=atom,-march=i686)
> 
> if it's an in-order architecture, wouldn't it be better to tune for i386
> or i486 instead ?
> 

Possibly.  It would be worth measuring.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03  6:48       ` H. Peter Anvin
@ 2009-05-03 11:08         ` Tobias Doerffel
  2009-05-04 13:14           ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Tobias Doerffel @ 2009-05-03 11:08 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Willy Tarreau, Ingo Molnar, Thomas Gleixner, Arjan van de Ven,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

[-- Attachment #1: Type: text/plain, Size: 356 bytes --]

Am Sonntag, 3. Mai 2009 08:48:54 schrieb H. Peter Anvin:
> Willy Tarreau wrote:
> >> $(call cc-option,-march=atom,-march=i686)
> >
> > if it's an in-order architecture, wouldn't it be better to tune for i386
> > or i486 instead ?
>
> Possibly.  It would be worth measuring.
How would one do that (never benchmarked kernel stuff before)?

Regards,

Tobias


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03  5:38     ` Willy Tarreau
  2009-05-03  6:48       ` H. Peter Anvin
@ 2009-05-03 14:53       ` Arjan van de Ven
  2009-05-03 18:30         ` Willy Tarreau
  1 sibling, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2009-05-03 14:53 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: H. Peter Anvin, Ingo Molnar, Tobias Doerffel, Thomas Gleixner,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

On Sun, 3 May 2009 07:38:23 +0200
Willy Tarreau <w@1wt.eu> wrote:

> On Thu, Apr 30, 2009 at 10:10:08AM -0700, H. Peter Anvin wrote:
> > Ingo Molnar wrote:
> > >> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> > >> index 80177ec..07a11b0 100644
> > >> --- a/arch/x86/Makefile_32.cpu
> > >> +++ b/arch/x86/Makefile_32.cpu
> > >> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call
> > >> cc-option,-march=c3,-march=i486) $(align)-f
> > >> cflags-$(CONFIG_MVIAC3_2)	+= $(call
> > >> cc-option,-march=c3-2,-march=i686)
> > >> cflags-$(CONFIG_MVIAC7)		+= -march=i686
> > >> cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call
> > >> tune,core2) +cflags-$(CONFIG_MATOM)		+=
> > >> -march=atom $(call tune,atom) 
> > 
> > There should be a fallback option used here rather than requiring a
> > new gcc, e.g. something like:
> > 
> > $(call cc-option,-march=atom,-march=i686)
> 
> if it's an in-order architecture, wouldn't it be better to tune for
> i386 or i486 instead ?

-march isn't about tuning, it's about supported instructions.
The right line is
$(call cc-option,-march=atom,-march=core2)

For tuning, our experience is that currently -mtune=generic works best.
Not sure about the gcc's that have complete atom tuning support yet.

Please don't do something like "oh it's in order, so was the Pentium,
so lets use that"; it actually gives really really bad results.


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03 14:53       ` Arjan van de Ven
@ 2009-05-03 18:30         ` Willy Tarreau
  2009-05-03 18:37           ` H. Peter Anvin
  0 siblings, 1 reply; 26+ messages in thread
From: Willy Tarreau @ 2009-05-03 18:30 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: H. Peter Anvin, Ingo Molnar, Tobias Doerffel, Thomas Gleixner,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

On Sun, May 03, 2009 at 07:53:46AM -0700, Arjan van de Ven wrote:
> On Sun, 3 May 2009 07:38:23 +0200
> Willy Tarreau <w@1wt.eu> wrote:
> 
> > On Thu, Apr 30, 2009 at 10:10:08AM -0700, H. Peter Anvin wrote:
> > > Ingo Molnar wrote:
> > > >> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> > > >> index 80177ec..07a11b0 100644
> > > >> --- a/arch/x86/Makefile_32.cpu
> > > >> +++ b/arch/x86/Makefile_32.cpu
> > > >> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call
> > > >> cc-option,-march=c3,-march=i486) $(align)-f
> > > >> cflags-$(CONFIG_MVIAC3_2)	+= $(call
> > > >> cc-option,-march=c3-2,-march=i686)
> > > >> cflags-$(CONFIG_MVIAC7)		+= -march=i686
> > > >> cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call
> > > >> tune,core2) +cflags-$(CONFIG_MATOM)		+=
> > > >> -march=atom $(call tune,atom) 
> > > 
> > > There should be a fallback option used here rather than requiring a
> > > new gcc, e.g. something like:
> > > 
> > > $(call cc-option,-march=atom,-march=i686)
> > 
> > if it's an in-order architecture, wouldn't it be better to tune for
> > i386 or i486 instead ?
> 
> -march isn't about tuning, it's about supported instructions.

agreed, but unless specified otherwise using -mtune, -march also sets
default tuning for the indicated CPU. At least in my experience.

> The right line is
> $(call cc-option,-march=atom,-march=core2)

OK thanks.

> For tuning, our experience is that currently -mtune=generic works best.

OK.

> Not sure about the gcc's that have complete atom tuning support yet.
> 
> Please don't do something like "oh it's in order, so was the Pentium,
> so lets use that"; it actually gives really really bad results.

I know, I was not thinking about tuning for an "advanced" CPU such as the
pentium, but rather for something generic, hence my proposal of i486 or
i386. I did not know about the "generic" target. In my experience, tuning
for i386/i486 often shows best overall performance on recent CPUs such as
core2. I should try "generic" to compare.

Regards,
Willy


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03 18:30         ` Willy Tarreau
@ 2009-05-03 18:37           ` H. Peter Anvin
  2009-05-03 19:38             ` Måns Rullgård
  0 siblings, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2009-05-03 18:37 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Arjan van de Ven, Ingo Molnar, Tobias Doerffel, Thomas Gleixner,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

Willy Tarreau wrote:
>>>>
>>>> $(call cc-option,-march=atom,-march=i686)
>>> if it's an in-order architecture, wouldn't it be better to tune for
>>> i386 or i486 instead ?
>> -march isn't about tuning, it's about supported instructions.
> 
> agreed, but unless specified otherwise using -mtune, -march also sets
> default tuning for the indicated CPU. At least in my experience.
> 
>> The right line is
>> $(call cc-option,-march=atom,-march=core2)

For really old gcc's (we support all the way back to gcc 3.2 still)
-march=core2 might not work either.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03 18:37           ` H. Peter Anvin
@ 2009-05-03 19:38             ` Måns Rullgård
  0 siblings, 0 replies; 26+ messages in thread
From: Måns Rullgård @ 2009-05-03 19:38 UTC (permalink / raw)
  To: linux-kernel

"H. Peter Anvin" <hpa@zytor.com> writes:

> Willy Tarreau wrote:
>>>>>
>>>>> $(call cc-option,-march=atom,-march=i686)
>>>> if it's an in-order architecture, wouldn't it be better to tune for
>>>> i386 or i486 instead ?
>>> -march isn't about tuning, it's about supported instructions.
>> 
>> agreed, but unless specified otherwise using -mtune, -march also sets
>> default tuning for the indicated CPU. At least in my experience.
>> 
>>> The right line is
>>> $(call cc-option,-march=atom,-march=core2)
>
> For really old gcc's (we support all the way back to gcc 3.2 still)
> -march=core2 might not work either.

-march=core2 support was added in gcc 4.3.

-- 
Måns Rullgård
mans@mansr.com


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-04-30 12:08 Specific support for Intel Atom architecture Tobias Doerffel
  2009-04-30 15:40 ` Ingo Molnar
@ 2009-05-04  7:22 ` Andi Kleen
  2009-05-11 21:30   ` Tobias Doerffel
  2009-05-12 14:20   ` Ulrich Drepper
  1 sibling, 2 replies; 26+ messages in thread
From: Andi Kleen @ 2009-05-04  7:22 UTC (permalink / raw)
  To: Tobias Doerffel; +Cc: LKML

Tobias Doerffel <tobias.doerffel@gmail.com> writes:

> Hi,
>
> as some of you already might know, work is going on to make GCC fully support 
> Intel Atom architecture specifics, i.e. make -mtune=atom generate code 
> optimized for in-order architectures like Intel Atom [1].
>
> I therefore started to make up a small patch which adds Intel Atom as a new 
> processor family which can be selected upon configuration. It's nothing 
> special and also requires a patched GCC. I'd just like to get some feedback on 
> it, i.e. is X86_L1_CACHE_SHIFT=6 ok for Atom CPUs (I was not able to find any 
> information on Atom's cacheline size)?

64bytes.

> Any chance to include this patch once 
> the Atom patch went into GCC mainline (probably in GCC 4.5)? Any other 

atom support already went into gcc mainline.

> objections?
>
> Please Cc me, I'm not on the list.

FWIW I have a similar patch, but I haven't submitted it yet due
to lack of benchmark numbers.

Some comments on yours.

> diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
> index 8130334..8e565b7 100644
> --- a/arch/x86/Kconfig.cpu
> +++ b/arch/x86/Kconfig.cpu
> @@ -262,6 +262,15 @@ config MCORE2
>  	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
>  	  (not a typo)
>  
> +config MATOM
> +	bool "Intel Atom"
> +	depends on X86_32

This is wrong, There are Atom CPUs which support 64bit code too.

> +
>  config GENERIC_CPU
>  	bool "Generic-x86-64"
>  	depends on X86_64
> @@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
>  	default "7" if MPENTIUM4 || MPSC
>  	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
>  	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
> -	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
> +	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
>  
>  config X86_XADD
>  	def_bool y
> @@ -355,11 +364,11 @@ config X86_ALIGNMENT_16
>  
>  config X86_INTEL_USERCOPY
>  	def_bool y
> -	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
> +	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MATOM


I don't think that's necessarily a good idea. You would need benchmarks showing
that intel user copy performs better on Atom than the original one. Do you have
some?

>  
>  config X86_USE_PPRO_CHECKSUM
>  	def_bool y
> -	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
> +	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM

Similar here. Atom is quite different from PPro/K8.

  
>  config X86_USE_3DNOW

>  config X86_MINIMUM_CPU_FAMILY
>  	int
> diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> index 80177ec..07a11b0 100644
> --- a/arch/x86/Makefile_32.cpu
> +++ b/arch/x86/Makefile_32.cpu
> @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
>  cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
>  cflags-$(CONFIG_MVIAC7)		+= -march=i686
>  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
> +cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
>  
>  # AMD Elan support
>  cflags-$(CONFIG_X86_ELAN)	+= -march=i486

That needs to be in the 64bit version too.


> diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
> index 47d6274..e959c4a 100644
> --- a/arch/x86/include/asm/module.h
> +++ b/arch/x86/include/asm/module.h
> @@ -28,6 +28,8 @@ struct mod_arch_specific {};
>  #define MODULE_PROC_FAMILY "586MMX "
>  #elif defined CONFIG_MCORE2
>  #define MODULE_PROC_FAMILY "CORE2 "
> +#elif defined CONFIG_MATOM
> +#define MODULE_PROC_FAMILY "ATOM "

This should be obsolete anyways, you can just uses CORE2. They have compatible ISAs.


-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-03 11:08         ` Tobias Doerffel
@ 2009-05-04 13:14           ` Ingo Molnar
  2009-05-04 13:32             ` Arjan van de Ven
  0 siblings, 1 reply; 26+ messages in thread
From: Ingo Molnar @ 2009-05-04 13:14 UTC (permalink / raw)
  To: Tobias Doerffel
  Cc: H. Peter Anvin, Willy Tarreau, Thomas Gleixner, Arjan van de Ven,
	Suresh Siddha, Pallipadi, Venkatesh, LKML


* Tobias Doerffel <tobias.doerffel@gmail.com> wrote:

> Am Sonntag, 3. Mai 2009 08:48:54 schrieb H. Peter Anvin:
> > Willy Tarreau wrote:
> > >> $(call cc-option,-march=atom,-march=i686)
> > >
> > > if it's an in-order architecture, wouldn't it be better to tune for i386
> > > or i486 instead ?
> >
> > Possibly.  It would be worth measuring.
>
> How would one do that (never benchmarked kernel stuff before)?

A standard method is to run lmbench and compare the results - 
lmbench has a built-in 'report comparison between two runs' feature.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-04 13:14           ` Ingo Molnar
@ 2009-05-04 13:32             ` Arjan van de Ven
  2009-05-04 17:55               ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Arjan van de Ven @ 2009-05-04 13:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Tobias Doerffel, H. Peter Anvin, Willy Tarreau, Thomas Gleixner,
	Suresh Siddha, Pallipadi, Venkatesh, LKML

On Mon, 4 May 2009 15:14:57 +0200
Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Tobias Doerffel <tobias.doerffel@gmail.com> wrote:
> 
> > Am Sonntag, 3. Mai 2009 08:48:54 schrieb H. Peter Anvin:
> > > Willy Tarreau wrote:
> > > >> $(call cc-option,-march=atom,-march=i686)
> > > >
> > > > if it's an in-order architecture, wouldn't it be better to tune
> > > > for i386 or i486 instead ?
> > >
> > > Possibly.  It would be worth measuring.
> >
> > How would one do that (never benchmarked kernel stuff before)?
> 
> A standard method is to run lmbench and compare the results - 
> lmbench has a built-in 'report comparison between two runs' feature.

well... you're normally REALLY hard pressed to measure compiler
differences this way..... 

normally compiler options get benchmarked using speccpu and the like....


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-04 13:32             ` Arjan van de Ven
@ 2009-05-04 17:55               ` Ingo Molnar
  0 siblings, 0 replies; 26+ messages in thread
From: Ingo Molnar @ 2009-05-04 17:55 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Tobias Doerffel, H. Peter Anvin, Willy Tarreau, Thomas Gleixner,
	Suresh Siddha, Pallipadi, Venkatesh, LKML


* Arjan van de Ven <arjan@infradead.org> wrote:

> On Mon, 4 May 2009 15:14:57 +0200
> Ingo Molnar <mingo@elte.hu> wrote:
> 
> > 
> > * Tobias Doerffel <tobias.doerffel@gmail.com> wrote:
> > 
> > > Am Sonntag, 3. Mai 2009 08:48:54 schrieb H. Peter Anvin:
> > > > Willy Tarreau wrote:
> > > > >> $(call cc-option,-march=atom,-march=i686)
> > > > >
> > > > > if it's an in-order architecture, wouldn't it be better to tune
> > > > > for i386 or i486 instead ?
> > > >
> > > > Possibly.  It would be worth measuring.
> > >
> > > How would one do that (never benchmarked kernel stuff before)?
> > 
> > A standard method is to run lmbench and compare the results - 
> > lmbench has a built-in 'report comparison between two runs' 
> > feature.
> 
> well... you're normally REALLY hard pressed to measure compiler 
> differences this way.....
> 
> normally compiler options get benchmarked using speccpu and the 
> like....

Well, if there's no measurable difference in lmbench at all then the 
options probably dont matter that much. If some workload is found 
where compiler options show a difference then that matters. Speccpu 
only matters if those compiler options also help the kernel, in a 
measurable way.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-04  7:22 ` Andi Kleen
@ 2009-05-11 21:30   ` Tobias Doerffel
  2009-05-12  6:53     ` Andi Kleen
  2009-05-12 14:20   ` Ulrich Drepper
  1 sibling, 1 reply; 26+ messages in thread
From: Tobias Doerffel @ 2009-05-11 21:30 UTC (permalink / raw)
  To: Andi Kleen
  Cc: LKML, Thomas Gleixner, Arjan van de Ven, Suresh Siddha,
	Pallipadi, Venkatesh, Ingo Molnar, Willy Tarreau


[-- Attachment #1.1: Type: text/plain, Size: 3222 bytes --]

Hi,

thanks for your comments. Fixed some of your remarks and attached a new patch.

Am Montag, 4. Mai 2009 09:22:46 schrieb Andi Kleen:
> This is wrong, There are Atom CPUs which support 64bit code too.
Fixed.

> >  config X86_XADD
> >  	def_bool y
> > @@ -355,11 +364,11 @@ config X86_ALIGNMENT_16
> >
> >  config X86_INTEL_USERCOPY
> >  	def_bool y
> > -	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII ||
> > M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 +	depends on
> > MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX ||
> > X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MATOM
>
> I don't think that's necessarily a good idea. You would need benchmarks
> showing that intel user copy performs better on Atom than the original one.
> Do you have some?
You're right here. I made some quick benchmarks of 
__copy_user[_intel[_nocache]]() and __copy_zeroing[_intel[_nocache]]() in 
userspace and the generic ones indeed were about 15% faster.

> >  config X86_USE_PPRO_CHECKSUM
> >  	def_bool y
> > -	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 ||
> > MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 ||
> > MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 +	depends on MWINCHIP3D ||
> > MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM ||
> > MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON ||
> > MGEODE_LX || MCORE2 || MATOM
>
> Similar here. Atom is quite different from PPro/K8.
Made some benchmarks of csum_partial() and csum_partial_copy_generic() as 
well. Here the PPro version of csum_partial() performed 10-15% better 
(depending on buffer len) while both implementations of 
csum_partial_copy_generic() performed equal.


> > diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
> > index 80177ec..07a11b0 100644
> > --- a/arch/x86/Makefile_32.cpu
> > +++ b/arch/x86/Makefile_32.cpu
> > @@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call
> > cc-option,-march=c3,-march=i486) $(align)-f cflags-$(CONFIG_MVIAC3_2)	+=
> > $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7)		+=
> > -march=i686
> >  cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
> > +cflags-$(CONFIG_MATOM)		+= -march=atom $(call tune,atom)
> >
> >  # AMD Elan support
> >  cflags-$(CONFIG_X86_ELAN)	+= -march=i486
>
> That needs to be in the 64bit version too.
Fixed as well. Also included changes to call cc-option as recommended by hpa.

> > diff --git a/arch/x86/include/asm/module.h
> > b/arch/x86/include/asm/module.h index 47d6274..e959c4a 100644
> > --- a/arch/x86/include/asm/module.h
> > +++ b/arch/x86/include/asm/module.h
> > @@ -28,6 +28,8 @@ struct mod_arch_specific {};
> >  #define MODULE_PROC_FAMILY "586MMX "
> >  #elif defined CONFIG_MCORE2
> >  #define MODULE_PROC_FAMILY "CORE2 "
> > +#elif defined CONFIG_MATOM
> > +#define MODULE_PROC_FAMILY "ATOM "
>
> This should be obsolete anyways, you can just uses CORE2. They have
> compatible ISAs.
So you would recommend writing

#elif defined CONFIG_MCORE2 || defined CONFIG_ATOM
#define MODULE_PROC_FAMILY "CORE2 "

?

Regards,

Tobias

[-- Attachment #1.2: 0001-x86-add-specific-support-for-Intel-Atom-architectur.patch --]
[-- Type: text/x-patch, Size: 5068 bytes --]

From bd9378b21f86a783dc17a741d2167e7158070d97 Mon Sep 17 00:00:00 2001
From: Tobias Doerffel <tobias.doerffel@gmail.com>
Date: Mon, 11 May 2009 23:20:54 +0200
Subject: [PATCH] x86: add specific support for Intel Atom architecture

This adds another option when selecting CPU family so the kernel can
be optimized for Intel Atom CPUs. If GCC supports tuning options for
Intel Atom they will be used.
---
 arch/x86/Kconfig.cpu          |   17 +++++++++++++----
 arch/x86/Makefile             |    2 ++
 arch/x86/Makefile_32.cpu      |    1 +
 arch/x86/include/asm/module.h |    2 ++
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8130334..f88a7f6 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -262,6 +262,15 @@ config MCORE2
 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
 	  (not a typo)
 
+config MATOM
+	bool "Intel Atom"
+	---help---
+
+	  Select this for Intel Atom platform. Intel Atom CPUs have an in-order
+	  pipelining architecture and thus can benefit from in-order optimized
+	  code. Use a recent GCC with specific Intel Atom support in order to
+	  fully benefit from selecting this option.
+
 config GENERIC_CPU
 	bool "Generic-x86-64"
 	depends on X86_64
@@ -310,7 +319,7 @@ config X86_L1_CACHE_SHIFT
 	default "7" if MPENTIUM4 || MPSC
 	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 
 config X86_XADD
 	def_bool y
@@ -359,7 +368,7 @@ config X86_INTEL_USERCOPY
 
 config X86_USE_PPRO_CHECKSUM
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
 
 config X86_USE_3DNOW
 	def_bool y
@@ -387,7 +396,7 @@ config X86_P6_NOP
 
 config X86_TSC
 	def_bool y
-	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
+	depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
 
 config X86_CMPXCHG64
 	def_bool y
@@ -397,7 +406,7 @@ config X86_CMPXCHG64
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64)
+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8c86b72..3cfbd74 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -57,6 +57,8 @@ else
 
         cflags-$(CONFIG_MCORE2) += \
                 $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
+	cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
+		$(call cc-option,-mtune=atom)
         cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
         KBUILD_CFLAGS += $(cflags-y)
 
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 80177ec..4470fa0 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -33,6 +33,7 @@ cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-f
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_MVIAC7)		+= -march=i686
 cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call tune,core2)
+cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,-march=core2) $(call cc-option,-mtune=atom)
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)	+= -march=i486
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d6274..e959c4a 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -28,6 +28,8 @@ struct mod_arch_specific {};
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MATOM
+#define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
 #define MODULE_PROC_FAMILY "686 "
 #elif defined CONFIG_MPENTIUMII
-- 
1.6.2.4


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-11 21:30   ` Tobias Doerffel
@ 2009-05-12  6:53     ` Andi Kleen
  0 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2009-05-12  6:53 UTC (permalink / raw)
  To: Tobias Doerffel
  Cc: Andi Kleen, LKML, Thomas Gleixner, Arjan van de Ven,
	Suresh Siddha, Pallipadi, Venkatesh, Ingo Molnar, Willy Tarreau

On Mon, May 11, 2009 at 11:30:19PM +0200, Tobias Doerffel wrote:
> > > diff --git a/arch/x86/include/asm/module.h
> > > b/arch/x86/include/asm/module.h index 47d6274..e959c4a 100644
> > > --- a/arch/x86/include/asm/module.h
> > > +++ b/arch/x86/include/asm/module.h
> > > @@ -28,6 +28,8 @@ struct mod_arch_specific {};
> > >  #define MODULE_PROC_FAMILY "586MMX "
> > >  #elif defined CONFIG_MCORE2
> > >  #define MODULE_PROC_FAMILY "CORE2 "
> > > +#elif defined CONFIG_MATOM
> > > +#define MODULE_PROC_FAMILY "ATOM "
> >
> > This should be obsolete anyways, you can just uses CORE2. They have
> > compatible ISAs.
> So you would recommend writing
> 
> #elif defined CONFIG_MCORE2 || defined CONFIG_ATOM
> #define MODULE_PROC_FAMILY "CORE2 "
> 
> ?

Yes.  Or maybe you can find a better name.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-04  7:22 ` Andi Kleen
  2009-05-11 21:30   ` Tobias Doerffel
@ 2009-05-12 14:20   ` Ulrich Drepper
  2009-05-12 15:04     ` Andi Kleen
  1 sibling, 1 reply; 26+ messages in thread
From: Ulrich Drepper @ 2009-05-12 14:20 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Tobias Doerffel, LKML

On Mon, May 4, 2009 at 12:22 AM, Andi Kleen <andi@firstfloor.org> wrote:
> This should be obsolete anyways, you can just uses CORE2. They have compatible ISAs.

Only correct if you don't plan to use the movbe instruction.  The
kernel would be the one place where I can imagine this to make sense.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-12 14:20   ` Ulrich Drepper
@ 2009-05-12 15:04     ` Andi Kleen
  2009-05-12 17:45       ` Ulrich Drepper
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2009-05-12 15:04 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Andi Kleen, Tobias Doerffel, LKML

On Tue, May 12, 2009 at 07:20:14AM -0700, Ulrich Drepper wrote:
> On Mon, May 4, 2009 at 12:22 AM, Andi Kleen <andi@firstfloor.org> wrote:
> > This should be obsolete anyways, you can just uses CORE2. They have compatible ISAs.
> 
> Only correct if you don't plan to use the movbe instruction.  The
> kernel would be the one place where I can imagine this to make sense.

The problem is that you can't express the situations where
movbe is better than bswap (you need both and the old and the new
value) in inline assembler in a way that gcc decides automatically.

I also doubt there are many (any?) situations in the kernel where
the destruction of the old register is a problem in the kernel;
e.g. the network stack normally doesn't care.

My understanding is that movbe is really mainly useful for 
some special situations where you run a emulator/jit for 
a BE ISA, but that's not something the kernel does.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-12 15:04     ` Andi Kleen
@ 2009-05-12 17:45       ` Ulrich Drepper
  2009-05-12 18:13         ` Andi Kleen
  2009-05-14  5:04         ` Harvey Harrison
  0 siblings, 2 replies; 26+ messages in thread
From: Ulrich Drepper @ 2009-05-12 17:45 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Tobias Doerffel, LKML

On Tue, May 12, 2009 at 8:04 AM, Andi Kleen <andi@firstfloor.org> wrote:
> The problem is that you can't express the situations where
> movbe is better than bswap (you need both and the old and the new
> value) in inline assembler in a way that gcc decides automatically.

True.  But I was mostly thinking about loads from memory.  A quick
search for ntoh*/hton* shows code like

        u_int16_t queue_num = ntohs(nfmsg->res_id);

If there would be a ntohs_load() macro movbe could be used.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-12 17:45       ` Ulrich Drepper
@ 2009-05-12 18:13         ` Andi Kleen
  2009-05-14  5:04         ` Harvey Harrison
  1 sibling, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2009-05-12 18:13 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Andi Kleen, Tobias Doerffel, LKML

On Tue, May 12, 2009 at 10:45:00AM -0700, Ulrich Drepper wrote:
> On Tue, May 12, 2009 at 8:04 AM, Andi Kleen <andi@firstfloor.org> wrote:
> > The problem is that you can't express the situations where
> > movbe is better than bswap (you need both and the old and the new
> > value) in inline assembler in a way that gcc decides automatically.
> 
> True.  But I was mostly thinking about loads from memory.  A quick
> search for ntoh*/hton* shows code like
> 
>         u_int16_t queue_num = ntohs(nfmsg->res_id);
> 
> If there would be a ntohs_load() macro movbe could be used.

It wouldn't surprise me if

	movbe memory,%reg

generates the same uops sequence internally as

	mov memory,%reg
	bswap %reg

I doubt there's any dedicated hardware for this in Atom (but I don't
know for sure) 

So unless you're really decoding constrained it would only
save a few bytes of code size. Probably not worth having 
incompatible modules for or adding special code to the source.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-12 17:45       ` Ulrich Drepper
  2009-05-12 18:13         ` Andi Kleen
@ 2009-05-14  5:04         ` Harvey Harrison
  2009-05-14 13:38           ` Ulrich Drepper
  1 sibling, 1 reply; 26+ messages in thread
From: Harvey Harrison @ 2009-05-14  5:04 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Andi Kleen, Tobias Doerffel, LKML

On Tue, 2009-05-12 at 10:45 -0700, Ulrich Drepper wrote:
> On Tue, May 12, 2009 at 8:04 AM, Andi Kleen <andi@firstfloor.org> wrote:
> > The problem is that you can't express the situations where
> > movbe is better than bswap (you need both and the old and the new
> > value) in inline assembler in a way that gcc decides automatically.
> 
> True.  But I was mostly thinking about loads from memory.  A quick
> search for ntoh*/hton* shows code like
> 
>         u_int16_t queue_num = ntohs(nfmsg->res_id);
> 
> If there would be a ntohs_load() macro movbe could be used.

It's called be16_to_cpup, or on x86, swab16p()


Cheers,

Harvey


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-14  5:04         ` Harvey Harrison
@ 2009-05-14 13:38           ` Ulrich Drepper
  2009-05-14 14:01             ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Ulrich Drepper @ 2009-05-14 13:38 UTC (permalink / raw)
  To: Harvey Harrison; +Cc: Andi Kleen, Tobias Doerffel, LKML

On Wed, May 13, 2009 at 10:04 PM, Harvey Harrison
<harvey.harrison@gmail.com> wrote:
> It's called be16_to_cpup, or on x86, swab16p()

Indeed.  If now somebody with an Atom could test whether using movbe
has an advantage (my guess is that there is a slight advantage) then
one could define a special version of the __beXX_to_cpup and
__cpu_to_beXXp functions for Atom and start using these functions more
rigorously in the tree.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-14 13:38           ` Ulrich Drepper
@ 2009-05-14 14:01             ` Andi Kleen
  2009-05-14 16:19               ` Ulrich Drepper
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2009-05-14 14:01 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Harvey Harrison, Andi Kleen, Tobias Doerffel, LKML

On Thu, May 14, 2009 at 06:38:48AM -0700, Ulrich Drepper wrote:
> On Wed, May 13, 2009 at 10:04 PM, Harvey Harrison
> <harvey.harrison@gmail.com> wrote:
> > It's called be16_to_cpup, or on x86, swab16p()
> 
> Indeed.  If now somebody with an Atom could test whether using movbe
> has an advantage (my guess is that there is a slight advantage) then

How would you test that?

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-14 14:01             ` Andi Kleen
@ 2009-05-14 16:19               ` Ulrich Drepper
  2009-05-14 17:29                 ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: Ulrich Drepper @ 2009-05-14 16:19 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Harvey Harrison, Tobias Doerffel, LKML

On Thu, May 14, 2009 at 7:01 AM, Andi Kleen <andi@firstfloor.org> wrote:
> How would you test that?

Compare runtimes with mov+bswap for some simple code which uses the
value after the conversion (e.g., just add to something).

Or in your case: get the Atom designers to comment.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: Specific support for Intel Atom architecture
  2009-05-14 16:19               ` Ulrich Drepper
@ 2009-05-14 17:29                 ` Andi Kleen
  0 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2009-05-14 17:29 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Andi Kleen, Harvey Harrison, Tobias Doerffel, LKML

On Thu, May 14, 2009 at 09:19:38AM -0700, Ulrich Drepper wrote:
> On Thu, May 14, 2009 at 7:01 AM, Andi Kleen <andi@firstfloor.org> wrote:
> > How would you test that?
> 
> Compare runtimes with mov+bswap for some simple code which uses the
> value after the conversion (e.g., just add to something).
> 
> Or in your case: get the Atom designers to comment.

Don't really need Atom designers; you can prove or disprove my theory
(that they generate the same uops sequence) by checking the uops performance
counter for a micro benchmark.

However even if that was not the case I have some doubts the
kernel is doing enough endian conversions that it really matters.

For example the network stack is doing maybe 4-5 endian conversions
(very conservative estimate) per packet and processing a packet
takes tens of thousands of cycles. But at best you could save 1-2 cycles
this way, so even if you save a few cycles this way it will be very likely 
in the noise.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2009-05-14 17:23 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-30 12:08 Specific support for Intel Atom architecture Tobias Doerffel
2009-04-30 15:40 ` Ingo Molnar
2009-04-30 17:03   ` H. Peter Anvin
2009-04-30 17:10   ` H. Peter Anvin
2009-05-03  5:38     ` Willy Tarreau
2009-05-03  6:48       ` H. Peter Anvin
2009-05-03 11:08         ` Tobias Doerffel
2009-05-04 13:14           ` Ingo Molnar
2009-05-04 13:32             ` Arjan van de Ven
2009-05-04 17:55               ` Ingo Molnar
2009-05-03 14:53       ` Arjan van de Ven
2009-05-03 18:30         ` Willy Tarreau
2009-05-03 18:37           ` H. Peter Anvin
2009-05-03 19:38             ` Måns Rullgård
2009-05-04  7:22 ` Andi Kleen
2009-05-11 21:30   ` Tobias Doerffel
2009-05-12  6:53     ` Andi Kleen
2009-05-12 14:20   ` Ulrich Drepper
2009-05-12 15:04     ` Andi Kleen
2009-05-12 17:45       ` Ulrich Drepper
2009-05-12 18:13         ` Andi Kleen
2009-05-14  5:04         ` Harvey Harrison
2009-05-14 13:38           ` Ulrich Drepper
2009-05-14 14:01             ` Andi Kleen
2009-05-14 16:19               ` Ulrich Drepper
2009-05-14 17:29                 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox