public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] __cacheline_aligned always in own section
@ 2004-02-26  6:44 Rusty Russell
  2004-02-26  6:53 ` viro
  2004-02-26 22:09 ` Denis Vlasenko
  0 siblings, 2 replies; 5+ messages in thread
From: Rusty Russell @ 2004-02-26  6:44 UTC (permalink / raw)
  To: akpm, torvalds; +Cc: viro, linux-kernel

Name: Always Put Cache Aligned Code in Own Section: Even Modules
Status: Tested on 2.6.3-bk7

We put ____cacheline_aligned things in their own section, simply
because we waste less space that way.  Otherwise we end up padding
innocent variables to the next cacheline to get the required
alignment.

There's no reason not to do this in modules, too.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .16274-linux-2.6.3-bk7/include/linux/cache.h .16274-linux-2.6.3-bk7.updated/include/linux/cache.h
--- .16274-linux-2.6.3-bk7/include/linux/cache.h	2003-09-22 09:47:16.000000000 +1000
+++ .16274-linux-2.6.3-bk7.updated/include/linux/cache.h	2004-02-26 16:43:49.000000000 +1100
@@ -26,13 +26,9 @@
 #endif
 
 #ifndef __cacheline_aligned
-#ifdef MODULE
-#define __cacheline_aligned ____cacheline_aligned
-#else
 #define __cacheline_aligned					\
   __attribute__((__aligned__(SMP_CACHE_BYTES),			\
 		 __section__(".data.cacheline_aligned")))
-#endif
 #endif /* __cacheline_aligned */
 
 #ifndef __cacheline_aligned_in_smp

--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] __cacheline_aligned always in own section
  2004-02-26  6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
@ 2004-02-26  6:53 ` viro
  2004-02-26 22:09 ` Denis Vlasenko
  1 sibling, 0 replies; 5+ messages in thread
From: viro @ 2004-02-26  6:53 UTC (permalink / raw)
  To: Rusty Russell; +Cc: akpm, torvalds, linux-kernel

On Thu, Feb 26, 2004 at 05:44:47PM +1100, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
> 
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way.  Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
> 
> There's no reason not to do this in modules, too.

[snip]

while we are at it, arm-26, ppc, sparc, sparc64 and sh have per-arch
definitions of __cacheline_aligned that are identical to default.
And yes, removal is safe - all users of __cacheline_aligned actually
pull linux/cache.h in.

diff -urN RC3-bk1/include/asm-arm26/cache.h RC3-bk1-current/include/asm-arm26/cache.h
--- RC3-bk1/include/asm-arm26/cache.h	Sun Jun 15 03:00:39 2003
+++ RC3-bk1-current/include/asm-arm26/cache.h	Thu Feb 26 01:37:23 2004
@@ -8,12 +8,4 @@
 #define        L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 #define        SMP_CACHE_BYTES L1_CACHE_BYTES
 
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned					\
-  __attribute__((__aligned__(L1_CACHE_BYTES),			\
-		 __section__(".data.cacheline_aligned")))
-#endif
-
 #endif
diff -urN RC3-bk1/include/asm-ppc/cache.h RC3-bk1-current/include/asm-ppc/cache.h
--- RC3-bk1/include/asm-ppc/cache.h	Sat Sep 27 22:04:59 2003
+++ RC3-bk1-current/include/asm-ppc/cache.h	Thu Feb 26 01:39:41 2004
@@ -30,14 +30,6 @@
 #define	L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 #define	L1_CACHE_PAGES		8
 
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned					\
-  __attribute__((__aligned__(L1_CACHE_BYTES),			\
-		 __section__(".data.cacheline_aligned")))
-#endif
-
 #ifndef __ASSEMBLY__
 extern void clean_dcache_range(unsigned long start, unsigned long stop);
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
diff -urN RC3-bk1/include/asm-sh/cache.h RC3-bk1-current/include/asm-sh/cache.h
--- RC3-bk1/include/asm-sh/cache.h	Wed Feb  4 05:23:24 2004
+++ RC3-bk1-current/include/asm-sh/cache.h	Thu Feb 26 01:39:50 2004
@@ -21,14 +21,6 @@
 
 #define L1_CACHE_ALIGN(x)	(((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned					\
-  __attribute__((__aligned__(L1_CACHE_BYTES),			\
-		 __section__(".data.cacheline_aligned")))
-#endif
-
 #define L1_CACHE_SHIFT_MAX 	5	/* largest L1 which this arch supports */
 
 struct cache_info {
diff -urN RC3-bk1/include/asm-sparc/cache.h RC3-bk1-current/include/asm-sparc/cache.h
--- RC3-bk1/include/asm-sparc/cache.h	Mon Sep  2 09:14:46 2002
+++ RC3-bk1-current/include/asm-sparc/cache.h	Thu Feb 26 01:37:00 2004
@@ -17,14 +17,6 @@
 
 #define SMP_CACHE_BYTES 32
 
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned					\
-  __attribute__((__aligned__(SMP_CACHE_BYTES),			\
-		 __section__(".data.cacheline_aligned")))
-#endif
-
 /* Direct access to the instruction cache is provided through and
  * alternate address space.  The IDC bit must be off in the ICCR on
  * HyperSparcs for these accesses to work.  The code below does not do
diff -urN RC3-bk1/include/asm-sparc64/cache.h RC3-bk1-current/include/asm-sparc64/cache.h
--- RC3-bk1/include/asm-sparc64/cache.h	Mon Sep  2 09:14:48 2002
+++ RC3-bk1-current/include/asm-sparc64/cache.h	Thu Feb 26 01:37:06 2004
@@ -14,12 +14,4 @@
 #define        SMP_CACHE_BYTES_SHIFT	6
 #define        SMP_CACHE_BYTES		(1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
 
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned					\
-  __attribute__((__aligned__(SMP_CACHE_BYTES),			\
-		 __section__(".data.cacheline_aligned")))
-#endif
-
 #endif

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] __cacheline_aligned always in own section
  2004-02-26  6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
  2004-02-26  6:53 ` viro
@ 2004-02-26 22:09 ` Denis Vlasenko
  2004-02-26 23:21   ` Andrew Morton
  1 sibling, 1 reply; 5+ messages in thread
From: Denis Vlasenko @ 2004-02-26 22:09 UTC (permalink / raw)
  To: Rusty Russell, akpm, torvalds; +Cc: viro, linux-kernel

On Thursday 26 February 2004 08:44, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
>
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way.  Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
>
> There's no reason not to do this in modules, too.

On a related matter,

I compile my kernels for 486 but buils system aligns
functions and labels to 16 bytes, with results like this:

00000730 <islpci_eth_tx_timeout>:
     730:       55                      push   %ebp
     731:       89 e5                   mov    %esp,%ebp
     733:       8b 45 08                mov    0x8(%ebp),%eax
     736:       8b 40 64                mov    0x64(%eax),%eax
     739:       05 14 03 00 00          add    $0x314,%eax
     73e:       ff 40 14                incl   0x14(%eax)
     741:       5d                      pop    %ebp
     742:       c3                      ret
     743:       90                      nop
     744:       90                      nop
     745:       90                      nop
     746:       90                      nop
     747:       90                      nop
     748:       90                      nop
     749:       90                      nop
     74a:       90                      nop
     74b:       90                      nop
     74c:       90                      nop
     74d:       90                      nop
     74e:       90                      nop
     74f:       90                      nop

Losing on average 15/2 bytes to alignment, my kernel lose
# echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
149632
bytes only due to function alignment, not counting jump target
alighment.

Is there any way to prevent this?
--
vda


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] __cacheline_aligned always in own section
  2004-02-26 22:09 ` Denis Vlasenko
@ 2004-02-26 23:21   ` Andrew Morton
  2004-02-27  1:01     ` Matt Mackall
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2004-02-26 23:21 UTC (permalink / raw)
  To: Denis Vlasenko; +Cc: rusty, torvalds, viro, linux-kernel

Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> wrote:
>
> I compile my kernels for 486 but buils system aligns
> functions and labels to 16 bytes, with results like this:
> 
> 00000730 <islpci_eth_tx_timeout>:
>      730:       55                      push   %ebp
>      731:       89 e5                   mov    %esp,%ebp
>      733:       8b 45 08                mov    0x8(%ebp),%eax
>      736:       8b 40 64                mov    0x64(%eax),%eax
>      739:       05 14 03 00 00          add    $0x314,%eax
>      73e:       ff 40 14                incl   0x14(%eax)
>      741:       5d                      pop    %ebp
>      742:       c3                      ret
>      743:       90                      nop
>      744:       90                      nop
>      745:       90                      nop
>      746:       90                      nop
>      747:       90                      nop
>      748:       90                      nop
>      749:       90                      nop
>      74a:       90                      nop
>      74b:       90                      nop
>      74c:       90                      nop
>      74d:       90                      nop
>      74e:       90                      nop
>      74f:       90                      nop
> 
> Losing on average 15/2 bytes to alignment, my kernel lose
> # echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
> 149632
> bytes only due to function alignment, not counting jump target
> alighment.
> 
> Is there any way to prevent this?

Yes, there are ways of turning off a lot of this alignment padding and it
makes a significant different in code size.  You need to dig around in the
gcc documentation for the several -*align* options.

IIRC, not all of the padding could be turned off (the gcc options were not
complete) but it's a couple of years since I investigated this.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] __cacheline_aligned always in own section
  2004-02-26 23:21   ` Andrew Morton
@ 2004-02-27  1:01     ` Matt Mackall
  0 siblings, 0 replies; 5+ messages in thread
From: Matt Mackall @ 2004-02-27  1:01 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Denis Vlasenko, linux-kernel

On Thu, Feb 26, 2004 at 03:21:39PM -0800, Andrew Morton wrote:
> Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> wrote:
> >
> > I compile my kernels for 486 but buils system aligns
> > functions and labels to 16 bytes, with results like this:
> > 
> > 00000730 <islpci_eth_tx_timeout>:
> >      730:       55                      push   %ebp
> >      731:       89 e5                   mov    %esp,%ebp
> >      733:       8b 45 08                mov    0x8(%ebp),%eax
> >      736:       8b 40 64                mov    0x64(%eax),%eax
> >      739:       05 14 03 00 00          add    $0x314,%eax
> >      73e:       ff 40 14                incl   0x14(%eax)
> >      741:       5d                      pop    %ebp
> >      742:       c3                      ret
> >      743:       90                      nop
> >      744:       90                      nop
> >      745:       90                      nop
> >      746:       90                      nop
> >      747:       90                      nop
> >      748:       90                      nop
> >      749:       90                      nop
> >      74a:       90                      nop
> >      74b:       90                      nop
> >      74c:       90                      nop
> >      74d:       90                      nop
> >      74e:       90                      nop
> >      74f:       90                      nop
> > 
> > Losing on average 15/2 bytes to alignment, my kernel lose
> > # echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
> > 149632
> > bytes only due to function alignment, not counting jump target
> > alighment.
> > 
> > Is there any way to prevent this?
> 
> Yes, there are ways of turning off a lot of this alignment padding and it
> makes a significant different in code size.  You need to dig around in the
> gcc documentation for the several -*align* options.
> 
> IIRC, not all of the padding could be turned off (the gcc options were not
> complete) but it's a couple of years since I investigated this.

Denis, there's a patch in -tiny to let you experiment with overriding
CFLAGS. See http://selenic.com/tiny-about/

You probably want something like:

-falign-functions=1 -falign-jumps=1 -falign-labels=1 -falign-loops=1

-- 
Matt Mackall : http://www.selenic.com : Linux development and consulting

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2004-02-27  1:06 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-26  6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
2004-02-26  6:53 ` viro
2004-02-26 22:09 ` Denis Vlasenko
2004-02-26 23:21   ` Andrew Morton
2004-02-27  1:01     ` Matt Mackall

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox