* [PATCH] __cacheline_aligned always in own section
@ 2004-02-26 6:44 Rusty Russell
2004-02-26 6:53 ` viro
2004-02-26 22:09 ` Denis Vlasenko
0 siblings, 2 replies; 5+ messages in thread
From: Rusty Russell @ 2004-02-26 6:44 UTC (permalink / raw)
To: akpm, torvalds; +Cc: viro, linux-kernel
Name: Always Put Cache Aligned Code in Own Section: Even Modules
Status: Tested on 2.6.3-bk7
We put ____cacheline_aligned things in their own section, simply
because we waste less space that way. Otherwise we end up padding
innocent variables to the next cacheline to get the required
alignment.
There's no reason not to do this in modules, too.
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .16274-linux-2.6.3-bk7/include/linux/cache.h .16274-linux-2.6.3-bk7.updated/include/linux/cache.h
--- .16274-linux-2.6.3-bk7/include/linux/cache.h 2003-09-22 09:47:16.000000000 +1000
+++ .16274-linux-2.6.3-bk7.updated/include/linux/cache.h 2004-02-26 16:43:49.000000000 +1100
@@ -26,13 +26,9 @@
#endif
#ifndef __cacheline_aligned
-#ifdef MODULE
-#define __cacheline_aligned ____cacheline_aligned
-#else
#define __cacheline_aligned \
__attribute__((__aligned__(SMP_CACHE_BYTES), \
__section__(".data.cacheline_aligned")))
-#endif
#endif /* __cacheline_aligned */
#ifndef __cacheline_aligned_in_smp
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
@ 2004-02-26 6:53 ` viro
2004-02-26 22:09 ` Denis Vlasenko
1 sibling, 0 replies; 5+ messages in thread
From: viro @ 2004-02-26 6:53 UTC (permalink / raw)
To: Rusty Russell; +Cc: akpm, torvalds, linux-kernel
On Thu, Feb 26, 2004 at 05:44:47PM +1100, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
>
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way. Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
>
> There's no reason not to do this in modules, too.
[snip]
while we are at it, arm-26, ppc, sparc, sparc64 and sh have per-arch
definitions of __cacheline_aligned that are identical to default.
And yes, removal is safe - all users of __cacheline_aligned actually
pull linux/cache.h in.
diff -urN RC3-bk1/include/asm-arm26/cache.h RC3-bk1-current/include/asm-arm26/cache.h
--- RC3-bk1/include/asm-arm26/cache.h Sun Jun 15 03:00:39 2003
+++ RC3-bk1-current/include/asm-arm26/cache.h Thu Feb 26 01:37:23 2004
@@ -8,12 +8,4 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#endif
diff -urN RC3-bk1/include/asm-ppc/cache.h RC3-bk1-current/include/asm-ppc/cache.h
--- RC3-bk1/include/asm-ppc/cache.h Sat Sep 27 22:04:59 2003
+++ RC3-bk1-current/include/asm-ppc/cache.h Thu Feb 26 01:39:41 2004
@@ -30,14 +30,6 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
#define L1_CACHE_PAGES 8
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#ifndef __ASSEMBLY__
extern void clean_dcache_range(unsigned long start, unsigned long stop);
extern void flush_dcache_range(unsigned long start, unsigned long stop);
diff -urN RC3-bk1/include/asm-sh/cache.h RC3-bk1-current/include/asm-sh/cache.h
--- RC3-bk1/include/asm-sh/cache.h Wed Feb 4 05:23:24 2004
+++ RC3-bk1-current/include/asm-sh/cache.h Thu Feb 26 01:39:50 2004
@@ -21,14 +21,6 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
struct cache_info {
diff -urN RC3-bk1/include/asm-sparc/cache.h RC3-bk1-current/include/asm-sparc/cache.h
--- RC3-bk1/include/asm-sparc/cache.h Mon Sep 2 09:14:46 2002
+++ RC3-bk1-current/include/asm-sparc/cache.h Thu Feb 26 01:37:00 2004
@@ -17,14 +17,6 @@
#define SMP_CACHE_BYTES 32
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(SMP_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
/* Direct access to the instruction cache is provided through and
* alternate address space. The IDC bit must be off in the ICCR on
* HyperSparcs for these accesses to work. The code below does not do
diff -urN RC3-bk1/include/asm-sparc64/cache.h RC3-bk1-current/include/asm-sparc64/cache.h
--- RC3-bk1/include/asm-sparc64/cache.h Mon Sep 2 09:14:48 2002
+++ RC3-bk1-current/include/asm-sparc64/cache.h Thu Feb 26 01:37:06 2004
@@ -14,12 +14,4 @@
#define SMP_CACHE_BYTES_SHIFT 6
#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(SMP_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#endif
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
2004-02-26 6:53 ` viro
@ 2004-02-26 22:09 ` Denis Vlasenko
2004-02-26 23:21 ` Andrew Morton
1 sibling, 1 reply; 5+ messages in thread
From: Denis Vlasenko @ 2004-02-26 22:09 UTC (permalink / raw)
To: Rusty Russell, akpm, torvalds; +Cc: viro, linux-kernel
On Thursday 26 February 2004 08:44, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
>
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way. Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
>
> There's no reason not to do this in modules, too.
On a related matter,
I compile my kernels for 486 but buils system aligns
functions and labels to 16 bytes, with results like this:
00000730 <islpci_eth_tx_timeout>:
730: 55 push %ebp
731: 89 e5 mov %esp,%ebp
733: 8b 45 08 mov 0x8(%ebp),%eax
736: 8b 40 64 mov 0x64(%eax),%eax
739: 05 14 03 00 00 add $0x314,%eax
73e: ff 40 14 incl 0x14(%eax)
741: 5d pop %ebp
742: c3 ret
743: 90 nop
744: 90 nop
745: 90 nop
746: 90 nop
747: 90 nop
748: 90 nop
749: 90 nop
74a: 90 nop
74b: 90 nop
74c: 90 nop
74d: 90 nop
74e: 90 nop
74f: 90 nop
Losing on average 15/2 bytes to alignment, my kernel lose
# echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
149632
bytes only due to function alignment, not counting jump target
alighment.
Is there any way to prevent this?
--
vda
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 22:09 ` Denis Vlasenko
@ 2004-02-26 23:21 ` Andrew Morton
2004-02-27 1:01 ` Matt Mackall
0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2004-02-26 23:21 UTC (permalink / raw)
To: Denis Vlasenko; +Cc: rusty, torvalds, viro, linux-kernel
Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> wrote:
>
> I compile my kernels for 486 but buils system aligns
> functions and labels to 16 bytes, with results like this:
>
> 00000730 <islpci_eth_tx_timeout>:
> 730: 55 push %ebp
> 731: 89 e5 mov %esp,%ebp
> 733: 8b 45 08 mov 0x8(%ebp),%eax
> 736: 8b 40 64 mov 0x64(%eax),%eax
> 739: 05 14 03 00 00 add $0x314,%eax
> 73e: ff 40 14 incl 0x14(%eax)
> 741: 5d pop %ebp
> 742: c3 ret
> 743: 90 nop
> 744: 90 nop
> 745: 90 nop
> 746: 90 nop
> 747: 90 nop
> 748: 90 nop
> 749: 90 nop
> 74a: 90 nop
> 74b: 90 nop
> 74c: 90 nop
> 74d: 90 nop
> 74e: 90 nop
> 74f: 90 nop
>
> Losing on average 15/2 bytes to alignment, my kernel lose
> # echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
> 149632
> bytes only due to function alignment, not counting jump target
> alighment.
>
> Is there any way to prevent this?
Yes, there are ways of turning off a lot of this alignment padding and it
makes a significant different in code size. You need to dig around in the
gcc documentation for the several -*align* options.
IIRC, not all of the padding could be turned off (the gcc options were not
complete) but it's a couple of years since I investigated this.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 23:21 ` Andrew Morton
@ 2004-02-27 1:01 ` Matt Mackall
0 siblings, 0 replies; 5+ messages in thread
From: Matt Mackall @ 2004-02-27 1:01 UTC (permalink / raw)
To: Andrew Morton; +Cc: Denis Vlasenko, linux-kernel
On Thu, Feb 26, 2004 at 03:21:39PM -0800, Andrew Morton wrote:
> Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> wrote:
> >
> > I compile my kernels for 486 but buils system aligns
> > functions and labels to 16 bytes, with results like this:
> >
> > 00000730 <islpci_eth_tx_timeout>:
> > 730: 55 push %ebp
> > 731: 89 e5 mov %esp,%ebp
> > 733: 8b 45 08 mov 0x8(%ebp),%eax
> > 736: 8b 40 64 mov 0x64(%eax),%eax
> > 739: 05 14 03 00 00 add $0x314,%eax
> > 73e: ff 40 14 incl 0x14(%eax)
> > 741: 5d pop %ebp
> > 742: c3 ret
> > 743: 90 nop
> > 744: 90 nop
> > 745: 90 nop
> > 746: 90 nop
> > 747: 90 nop
> > 748: 90 nop
> > 749: 90 nop
> > 74a: 90 nop
> > 74b: 90 nop
> > 74c: 90 nop
> > 74d: 90 nop
> > 74e: 90 nop
> > 74f: 90 nop
> >
> > Losing on average 15/2 bytes to alignment, my kernel lose
> > # echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
> > 149632
> > bytes only due to function alignment, not counting jump target
> > alighment.
> >
> > Is there any way to prevent this?
>
> Yes, there are ways of turning off a lot of this alignment padding and it
> makes a significant different in code size. You need to dig around in the
> gcc documentation for the several -*align* options.
>
> IIRC, not all of the padding could be turned off (the gcc options were not
> complete) but it's a couple of years since I investigated this.
Denis, there's a patch in -tiny to let you experiment with overriding
CFLAGS. See http://selenic.com/tiny-about/
You probably want something like:
-falign-functions=1 -falign-jumps=1 -falign-labels=1 -falign-loops=1
--
Matt Mackall : http://www.selenic.com : Linux development and consulting
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2004-02-27 1:06 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-26 6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
2004-02-26 6:53 ` viro
2004-02-26 22:09 ` Denis Vlasenko
2004-02-26 23:21 ` Andrew Morton
2004-02-27 1:01 ` Matt Mackall
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox