* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
@ 2004-02-26 6:53 ` viro
2004-02-26 22:09 ` Denis Vlasenko
1 sibling, 0 replies; 5+ messages in thread
From: viro @ 2004-02-26 6:53 UTC (permalink / raw)
To: Rusty Russell; +Cc: akpm, torvalds, linux-kernel
On Thu, Feb 26, 2004 at 05:44:47PM +1100, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
>
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way. Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
>
> There's no reason not to do this in modules, too.
[snip]
while we are at it, arm-26, ppc, sparc, sparc64 and sh have per-arch
definitions of __cacheline_aligned that are identical to default.
And yes, removal is safe - all users of __cacheline_aligned actually
pull linux/cache.h in.
diff -urN RC3-bk1/include/asm-arm26/cache.h RC3-bk1-current/include/asm-arm26/cache.h
--- RC3-bk1/include/asm-arm26/cache.h Sun Jun 15 03:00:39 2003
+++ RC3-bk1-current/include/asm-arm26/cache.h Thu Feb 26 01:37:23 2004
@@ -8,12 +8,4 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#endif
diff -urN RC3-bk1/include/asm-ppc/cache.h RC3-bk1-current/include/asm-ppc/cache.h
--- RC3-bk1/include/asm-ppc/cache.h Sat Sep 27 22:04:59 2003
+++ RC3-bk1-current/include/asm-ppc/cache.h Thu Feb 26 01:39:41 2004
@@ -30,14 +30,6 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
#define L1_CACHE_PAGES 8
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#ifndef __ASSEMBLY__
extern void clean_dcache_range(unsigned long start, unsigned long stop);
extern void flush_dcache_range(unsigned long start, unsigned long stop);
diff -urN RC3-bk1/include/asm-sh/cache.h RC3-bk1-current/include/asm-sh/cache.h
--- RC3-bk1/include/asm-sh/cache.h Wed Feb 4 05:23:24 2004
+++ RC3-bk1-current/include/asm-sh/cache.h Thu Feb 26 01:39:50 2004
@@ -21,14 +21,6 @@
#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(L1_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#define L1_CACHE_SHIFT_MAX 5 /* largest L1 which this arch supports */
struct cache_info {
diff -urN RC3-bk1/include/asm-sparc/cache.h RC3-bk1-current/include/asm-sparc/cache.h
--- RC3-bk1/include/asm-sparc/cache.h Mon Sep 2 09:14:46 2002
+++ RC3-bk1-current/include/asm-sparc/cache.h Thu Feb 26 01:37:00 2004
@@ -17,14 +17,6 @@
#define SMP_CACHE_BYTES 32
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(SMP_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
/* Direct access to the instruction cache is provided through and
* alternate address space. The IDC bit must be off in the ICCR on
* HyperSparcs for these accesses to work. The code below does not do
diff -urN RC3-bk1/include/asm-sparc64/cache.h RC3-bk1-current/include/asm-sparc64/cache.h
--- RC3-bk1/include/asm-sparc64/cache.h Mon Sep 2 09:14:48 2002
+++ RC3-bk1-current/include/asm-sparc64/cache.h Thu Feb 26 01:37:06 2004
@@ -14,12 +14,4 @@
#define SMP_CACHE_BYTES_SHIFT 6
#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
-#ifdef MODULE
-#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-#else
-#define __cacheline_aligned \
- __attribute__((__aligned__(SMP_CACHE_BYTES), \
- __section__(".data.cacheline_aligned")))
-#endif
-
#endif
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH] __cacheline_aligned always in own section
2004-02-26 6:44 [PATCH] __cacheline_aligned always in own section Rusty Russell
2004-02-26 6:53 ` viro
@ 2004-02-26 22:09 ` Denis Vlasenko
2004-02-26 23:21 ` Andrew Morton
1 sibling, 1 reply; 5+ messages in thread
From: Denis Vlasenko @ 2004-02-26 22:09 UTC (permalink / raw)
To: Rusty Russell, akpm, torvalds; +Cc: viro, linux-kernel
On Thursday 26 February 2004 08:44, Rusty Russell wrote:
> Name: Always Put Cache Aligned Code in Own Section: Even Modules
> Status: Tested on 2.6.3-bk7
>
> We put ____cacheline_aligned things in their own section, simply
> because we waste less space that way. Otherwise we end up padding
> innocent variables to the next cacheline to get the required
> alignment.
>
> There's no reason not to do this in modules, too.
On a related matter,
I compile my kernels for 486 but buils system aligns
functions and labels to 16 bytes, with results like this:
00000730 <islpci_eth_tx_timeout>:
730: 55 push %ebp
731: 89 e5 mov %esp,%ebp
733: 8b 45 08 mov 0x8(%ebp),%eax
736: 8b 40 64 mov 0x64(%eax),%eax
739: 05 14 03 00 00 add $0x314,%eax
73e: ff 40 14 incl 0x14(%eax)
741: 5d pop %ebp
742: c3 ret
743: 90 nop
744: 90 nop
745: 90 nop
746: 90 nop
747: 90 nop
748: 90 nop
749: 90 nop
74a: 90 nop
74b: 90 nop
74c: 90 nop
74d: 90 nop
74e: 90 nop
74f: 90 nop
Losing on average 15/2 bytes to alignment, my kernel lose
# echo $((`cat System.map | grep '0 ' | wc -l`*15/2))
149632
bytes only due to function alignment, not counting jump target
alighment.
Is there any way to prevent this?
--
vda
^ permalink raw reply [flat|nested] 5+ messages in thread