From mboxrd@z Thu Jan 1 00:00:00 1970 From: Junjie Chen Subject: [PATCH] eal: force gcc to inline rte_movX function Date: Thu, 12 Apr 2018 13:16:36 +0800 Message-ID: <20180412051636.240746-1-junjie.j.chen@intel.com> Cc: dev@dpdk.org, "Chen, Junjie" , Chen@dpdk.org To: bruce.richardson@intel.com, konstantin.ananyev@intel.com Return-path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 9634A1B6D6 for ; Thu, 12 Apr 2018 08:01:52 +0200 (CEST) List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Chen, Junjie" Sometimes gcc does not inline the function despite keyword *inline*, we obeserve rte_movX is not inline when doing performance profiling, so use *always_inline* keyword to force gcc to inline the function. Signed-off-by: Chen, Junjie --- .../common/include/arch/x86/rte_memcpy.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index cc140ecca..5ead68ab2 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -52,7 +52,7 @@ rte_memcpy(void *dst, const void *src, size_t n); * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -65,7 +65,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { __m256i ymm0; @@ -78,7 +78,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { __m512i zmm0; @@ -91,7 +91,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src) * Copy 128 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov128(uint8_t *dst, const uint8_t *src) { rte_mov64(dst + 0 * 64, src + 0 * 64); @@ -102,7 +102,7 @@ rte_mov128(uint8_t *dst, const uint8_t *src) * Copy 256 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov256(uint8_t *dst, const uint8_t *src) { rte_mov64(dst + 0 * 64, src + 0 * 64); @@ -293,7 +293,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -306,7 +306,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { __m256i ymm0; @@ -319,7 +319,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); @@ -486,7 +486,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) * Copy 16 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov16(uint8_t *dst, const uint8_t *src) { __m128i xmm0; @@ -499,7 +499,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src) * Copy 32 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov32(uint8_t *dst, const uint8_t *src) { rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); @@ -510,7 +510,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src) * Copy 64 bytes from one location to another, * locations should not overlap. */ -static inline void +static __rte_always_inline void rte_mov64(uint8_t *dst, const uint8_t *src) { rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); -- 2.16.0