From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757516AbZCEXcV (ORCPT ); Thu, 5 Mar 2009 18:32:21 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1757726AbZCEXbi (ORCPT ); Thu, 5 Mar 2009 18:31:38 -0500 Received: from smtp.polymtl.ca ([132.207.4.11]:48438 "EHLO smtp.polymtl.ca" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758226AbZCEXbg (ORCPT ); Thu, 5 Mar 2009 18:31:36 -0500 Message-Id: <20090305225513.917363040@polymtl.ca> References: <20090305224728.947235917@polymtl.ca> User-Agent: quilt/0.46-1 Date: Thu, 05 Mar 2009 17:47:38 -0500 From: Mathieu Desnoyers To: Linus Torvalds , Ingo Molnar , linux-kernel@vger.kernel.org, Andrew Morton , Steven Rostedt , ltt-dev@lists.casi.polymtl.ca, Peter Zijlstra , Frederic Weisbecker , Arjan van de Ven , Pekka Paalanen , Arnaldo Carvalho de Melo , "H. Peter Anvin" , Martin Bligh , "Frank Ch. Eigler" , Tom Zanussi , Masami Hiramatsu , KOSAKI Motohiro , Jason Baron , Christoph Hellwig , Jiaying Zhang , Eduard - Gabriel Munteanu , mrubin@google.com, md@google.com Cc: Mathieu Desnoyers , Zhaolei Subject: [RFC patch 10/41] lttng-optimize-write-to-page-function-remove-some-memcpy-calls Content-Disposition: inline; filename=lttng-optimize-write-to-page-function-remove-some-memcpy-calls.patch X-Poly-FromMTA: (test.casi.polymtl.ca [132.207.72.60]) at Thu, 5 Mar 2009 23:14:10 +0000 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Zhaolei : > Hello, Mathieu > > Why not use instructions generated by gcc instead of memcpy on arch without > 64bit write as: > case 4: *(u32 *)dest = *(const u32 *)src; > break; > case 8: *(u64 *)dest = *(const u64 *)src; > break; > > IMHO, even on arch without 64bit write, memcpy is more complex. #include char dest[100]; char src[100]; typedef uint64_t u64; typedef uint32_t u32; void gcc_u64(void) { asm("/* begin */"); *(u64 *)dest = *(const u64 *)src; asm("/* end */"); } movl src, %eax movl src+4, %edx movl %eax, dest movl %edx, dest+4 void twice_u32(void) { asm("/* begin */"); ((u32 *)dest)[0] = ((const u32 *)src)[0]; ((u32 *)dest)[1] = ((const u32 *)src)[1]; asm("/* end */"); } movl src, %eax movl %eax, dest movl src+4, %eax movl %eax, dest+4 gcc seems to do a better register scheduler than my code, so I think it's not so bad. I will take your proposal. Signed-off-by: Mathieu Desnoyers CC: Zhaolei --- include/linux/ltt-relay.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) Index: linux-2.6-lttng/include/linux/ltt-relay.h =================================================================== --- linux-2.6-lttng.orig/include/linux/ltt-relay.h 2009-03-05 15:40:02.000000000 -0500 +++ linux-2.6-lttng/include/linux/ltt-relay.h 2009-03-05 15:40:42.000000000 -0500 @@ -215,13 +215,16 @@ static inline void ltt_relay_do_copy(voi case 4: *(u32 *)dest = *(const u32 *)src; break; -#if (BITS_PER_LONG == 64) case 8: *(u64 *)dest = *(const u64 *)src; break; -#endif default: - memcpy(dest, src, len); + /* + * What we really want here is an inline memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; } } #else @@ -256,19 +259,19 @@ static inline void ltt_relay_do_copy(voi goto memcpy_fallback; *(u32 *)dest = *(const u32 *)src; break; -#if (BITS_PER_LONG == 64) case 8: if (unlikely(!addr_aligned(dest, src, 8))) goto memcpy_fallback; *(u64 *)dest = *(const u64 *)src; break; -#endif default: - goto memcpy_fallback; + /* + * What we really want here is an inline memcpy, but we don't + * have constants, so gcc generally uses a function call. + */ + for (; len > 0; len--) + *(u8 *)dest++ = *(const u8 *)src++; } - return; -memcpy_fallback: - memcpy(dest, src, len); } #endif -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68