* [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions @ 2018-01-09 6:57 Christophe Leroy 2019-05-03 14:14 ` Christophe Leroy 2019-05-03 14:14 ` Christophe Leroy 0 siblings, 2 replies; 6+ messages in thread From: Christophe Leroy @ 2018-01-09 6:57 UTC (permalink / raw) To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, Scott Wood Cc: linux-kernel, linuxppc-dev Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst() clobber memory, tell it what it clobbers: * dcbz(), dcbi() and dcbf() clobbers one cacheline as output * dcbf() and dcbst() clobbers one cacheline as input Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> --- arch/powerpc/include/asm/cache.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index c1d257aa4c2d..fc8fe18acf8c 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long); static inline void dcbz(void *addr) { - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbz 0, %1" : + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : + "r"(addr) :); } static inline void dcbi(void *addr) { - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbi 0, %1" : + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : + "r"(addr) :); } static inline void dcbf(void *addr) { - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbf 0, %1" : + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : + ); } static inline void dcbst(void *addr) { - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbst 0, %0" : : + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : + ); } #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ -- 2.13.3 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions 2018-01-09 6:57 [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions Christophe Leroy @ 2019-05-03 14:14 ` Christophe Leroy 2019-05-03 18:15 ` Segher Boessenkool 2019-05-03 14:14 ` Christophe Leroy 1 sibling, 1 reply; 6+ messages in thread From: Christophe Leroy @ 2019-05-03 14:14 UTC (permalink / raw) Cc: linux-kernel, Scott Wood, Paul Mackerras, linuxppc-dev Segher, A while ago I proposed the following patch, and didn't get any comment back on it. Do you have any opinion on it ? Is it good and worth it ? Thanks Christophe Le 09/01/2018 à 07:57, Christophe Leroy a écrit : > Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst() > clobber memory, tell it what it clobbers: > * dcbz(), dcbi() and dcbf() clobbers one cacheline as output > * dcbf() and dcbst() clobbers one cacheline as input > > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> > --- > arch/powerpc/include/asm/cache.h | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h > index c1d257aa4c2d..fc8fe18acf8c 100644 > --- a/arch/powerpc/include/asm/cache.h > +++ b/arch/powerpc/include/asm/cache.h > @@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long); > > static inline void dcbz(void *addr) > { > - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbz 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr) :); > } > > static inline void dcbi(void *addr) > { > - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbi 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr) :); > } > > static inline void dcbf(void *addr) > { > - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbf 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : > + ); > } > > static inline void dcbst(void *addr) > { > - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbst 0, %0" : : > + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : > + ); > } > #endif /* !__ASSEMBLY__ */ > #endif /* __KERNEL__ */ > ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions 2019-05-03 14:14 ` Christophe Leroy @ 2019-05-03 18:15 ` Segher Boessenkool 2019-05-06 16:31 ` Christophe Leroy 0 siblings, 1 reply; 6+ messages in thread From: Segher Boessenkool @ 2019-05-03 18:15 UTC (permalink / raw) To: Christophe Leroy; +Cc: Scott Wood, linuxppc-dev, Paul Mackerras, linux-kernel Hi Christophe, On Fri, May 03, 2019 at 04:14:13PM +0200, Christophe Leroy wrote: > A while ago I proposed the following patch, and didn't get any comment > back on it. I didn't see it. Maybe because of holiday :-) > Do you have any opinion on it ? Is it good and worth it ? > Le 09/01/2018 à 07:57, Christophe Leroy a écrit : > >Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst() > >clobber memory, tell it what it clobbers: > >* dcbz(), dcbi() and dcbf() clobbers one cacheline as output > >* dcbf() and dcbst() clobbers one cacheline as input You cannot "clobber input". Seen another way, only dcbi clobbers anything; dcbz zeroes it instead, and dcbf and dcbst only change in what caches the data hangs out. > >--- a/arch/powerpc/include/asm/cache.h > >+++ b/arch/powerpc/include/asm/cache.h > >@@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long); > > > > static inline void dcbz(void *addr) > > { > >- __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); > >+ __asm__ __volatile__ ("dcbz 0, %1" : > >+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > >+ "r"(addr) :); > > } The instruction does *not* work on the memory pointed to by addr. It works on the cache line containing the address addr. If you want to have addr always aligned, you need to document this, and check all callers, etc. > > static inline void dcbf(void *addr) > > { > >- __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); > >+ __asm__ __volatile__ ("dcbf 0, %1" : > >+ "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > >+ "r"(addr), "m"(*(char > >(*)[L1_CACHE_BYTES])addr) : > >+ ); > > } Newline damage... Was that your mailer? Also, you may want a "memory" clobber anyway, to get ordering correct for the synchronisation instructions. I think your changes make things less robust than they were before. [ Btw. Instead of __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); you can do __asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory"); to save some insns here and there. ] Segher ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions 2019-05-03 18:15 ` Segher Boessenkool @ 2019-05-06 16:31 ` Christophe Leroy 2019-05-06 16:53 ` Segher Boessenkool 0 siblings, 1 reply; 6+ messages in thread From: Christophe Leroy @ 2019-05-06 16:31 UTC (permalink / raw) To: Segher Boessenkool; +Cc: Scott Wood, linuxppc-dev, Paul Mackerras, linux-kernel Hi Segher, On 05/03/2019 06:15 PM, Segher Boessenkool wrote: > Hi Christophe, > > On Fri, May 03, 2019 at 04:14:13PM +0200, Christophe Leroy wrote: >> A while ago I proposed the following patch, and didn't get any comment >> back on it. > > I didn't see it. Maybe because of holiday :-) Thanks for this answer, I guess I'll drop it for the time being. However, I've tried your suggestion below and get unnexpected result. [...] > > > [ Btw. Instead of > > __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); > > you can do > > __asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory"); > > to save some insns here and there. ] Tried that change on dcbz() and checked function clear_page() static inline void clear_page(void *addr) { unsigned int i; for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES) dcbz(addr); } void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); /* * We shouldn't have to do this, but some versions of glibc * require it (ld.so assumes zero filled pages are icache clean) * - Anton */ flush_dcache_page(pg); } EXPORT_SYMBOL(clear_user_page); Before the change, clear_user_page: mflr 0 stw 0,4(1) bl _mcount stwu 1,-16(1) li 9,128 mflr 0 mtctr 9 stw 0,20(1) .L46: #APP # 88 "./arch/powerpc/include/asm/cache.h" 1 dcbz 0, 3 # 0 "" 2 #NO_APP addi 3,3,32 bdnz .L46 lwz 0,20(1) mr 3,5 mtlr 0 addi 1,1,16 b flush_dcache_page After the change clear_user_page: mflr 0 stw 0,4(1) bl _mcount stwu 1,-32(1) li 9,128 mflr 0 mtctr 9 stw 0,36(1) .L46: stw 3,8(1) addi 9,1,8 #APP # 88 "./arch/powerpc/include/asm/cache.h" 1 dcbz 0(9) # 0 "" 2 #NO_APP addi 3,3,32 bdnz .L46 mr 3,5 bl flush_dcache_page lwz 0,36(1) addi 1,1,32 mtlr 0 blr So first of all it uses an unexisting form of dcbz : "dcbz 0(9)" And in addition, it stores r3 somewhere and I guess expects to read it with dcbz ??? Looks like 'Z' is not the right constraint to use. Christophe ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions 2019-05-06 16:31 ` Christophe Leroy @ 2019-05-06 16:53 ` Segher Boessenkool 0 siblings, 0 replies; 6+ messages in thread From: Segher Boessenkool @ 2019-05-06 16:53 UTC (permalink / raw) To: Christophe Leroy; +Cc: Scott Wood, linuxppc-dev, Paul Mackerras, linux-kernel Hi! On Mon, May 06, 2019 at 04:31:38PM +0000, Christophe Leroy wrote: > However, I've tried your suggestion below and get unnexpected result. > >you can do > > > > __asm__ __volatile__ ("dcbf %0" : : "Z"(addr) : "memory"); > > > >to save some insns here and there. ] This should be "dcbf %y0". Sorry. And not addr -- it needs a mem there, so deref addr as usual. Segher ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions 2018-01-09 6:57 [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions Christophe Leroy 2019-05-03 14:14 ` Christophe Leroy @ 2019-05-03 14:14 ` Christophe Leroy 1 sibling, 0 replies; 6+ messages in thread From: Christophe Leroy @ 2019-05-03 14:14 UTC (permalink / raw) To: Segher Boessenkool; +Cc: linux-kernel, Scott Wood, Paul Mackerras, linuxppc-dev Segher, A while ago I proposed the following patch, and didn't get any comment back on it. Do you have any opinion on it ? Is it good and worth it ? Thanks Christophe Le 09/01/2018 à 07:57, Christophe Leroy a écrit : > Instead of just telling GCC that dcbz(), dcbi(), dcbf() and dcbst() > clobber memory, tell it what it clobbers: > * dcbz(), dcbi() and dcbf() clobbers one cacheline as output > * dcbf() and dcbst() clobbers one cacheline as input > > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> > --- > arch/powerpc/include/asm/cache.h | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/cache.h > b/arch/powerpc/include/asm/cache.h > index c1d257aa4c2d..fc8fe18acf8c 100644 > --- a/arch/powerpc/include/asm/cache.h > +++ b/arch/powerpc/include/asm/cache.h > @@ -82,22 +82,31 @@ extern void _set_L3CR(unsigned long); > static inline void dcbz(void *addr) > { > - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbz 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr) :); > } > static inline void dcbi(void *addr) > { > - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbi 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr) :); > } > static inline void dcbf(void *addr) > { > - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbf 0, %1" : > + "=m"(*(char (*)[L1_CACHE_BYTES])addr) : > + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : > + ); > } > static inline void dcbst(void *addr) > { > - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); > + __asm__ __volatile__ ("dcbst 0, %0" : : > + "r"(addr), "m"(*(char (*)[L1_CACHE_BYTES])addr) : > + ); > } > #endif /* !__ASSEMBLY__ */ > #endif /* __KERNEL__ */ > ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-05-06 16:54 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-01-09 6:57 [PATCH] powerpc/32: Remove memory clobber asm constraint on dcbX() functions Christophe Leroy 2019-05-03 14:14 ` Christophe Leroy 2019-05-03 18:15 ` Segher Boessenkool 2019-05-06 16:31 ` Christophe Leroy 2019-05-06 16:53 ` Segher Boessenkool 2019-05-03 14:14 ` Christophe Leroy
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).