linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Yanteng Si <siyanteng@loongson.cn>
Cc: peterz@infradead.org, mingo@redhat.com, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com,
	linux-perf-users@vger.kernel.org,
	loongson-kernel@lists.loongnix.cn
Subject: Re: [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 with the kernel sources
Date: Wed, 10 May 2023 12:48:17 -0300	[thread overview]
Message-ID: <ZFu8wUIl+GbEjWhn@kernel.org> (raw)
In-Reply-To: <c1d2cde32b89c3bdb822ab5d1638f2b0f5513902.1683712945.git.siyanteng@loongson.cn>

Em Wed, May 10, 2023 at 06:24:50PM +0800, Yanteng Si escreveu:
> Picking the changes from:
> 
>   commit 68674f94ffc9dddc ("x86: don't use REP_GOOD or ERMS for small
>    memory copies")
> 
> Silencing these perf build warnings:
> 
>   Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs
>    from latest version at 'arch/x86/lib/memcpy_64.S'
>   diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
>   Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs
>    from latest version at 'arch/x86/lib/memset_64.S'
>   diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S

Dropping this one:

In file included from bench/mem-memcpy-x86-64-asm.S:14:
bench/../../arch/x86/lib/memcpy_64.S:5:10: fatal error: linux/cfi_types.h: No such file or directory
    5 | #include <linux/cfi_types.h>
      |          ^~~~~~~~~~~~~~~~~~~
compilation terminated.

 
> Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
> ---
>  tools/arch/x86/lib/memcpy_64.S | 35 ++++++++-----------------
>  tools/arch/x86/lib/memset_64.S | 47 ++++++++--------------------------
>  2 files changed, 22 insertions(+), 60 deletions(-)
> 
> diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
> index a91ac666f758..8f95fb267caa 100644
> --- a/tools/arch/x86/lib/memcpy_64.S
> +++ b/tools/arch/x86/lib/memcpy_64.S
> @@ -2,6 +2,7 @@
>  /* Copyright 2002 Andi Kleen */
>  
>  #include <linux/linkage.h>
> +#include <linux/cfi_types.h>
>  #include <asm/errno.h>
>  #include <asm/cpufeatures.h>
>  #include <asm/alternative.h>
> @@ -9,13 +10,6 @@
>  
>  .section .noinstr.text, "ax"
>  
> -/*
> - * We build a jump to memcpy_orig by default which gets NOPped out on
> - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
> - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
> - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
> - */
> -
>  /*
>   * memcpy - Copy a memory block.
>   *
> @@ -26,17 +20,21 @@
>   *
>   * Output:
>   * rax original destination
> + *
> + * The FSRM alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep movsb' itself is small enough to replace the call, but the
> + * two register moves blow up the code. And one of them is "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
>   */
>  SYM_TYPED_FUNC_START(__memcpy)
> -	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
> -		      "jmp memcpy_erms", X86_FEATURE_ERMS
> +	ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
>  
>  	movq %rdi, %rax
>  	movq %rdx, %rcx
> -	shrq $3, %rcx
> -	andl $7, %edx
> -	rep movsq
> -	movl %edx, %ecx
>  	rep movsb
>  	RET
>  SYM_FUNC_END(__memcpy)
> @@ -45,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
>  SYM_FUNC_ALIAS(memcpy, __memcpy)
>  EXPORT_SYMBOL(memcpy)
>  
> -/*
> - * memcpy_erms() - enhanced fast string memcpy. This is faster and
> - * simpler than memcpy. Use memcpy_erms when possible.
> - */
> -SYM_FUNC_START_LOCAL(memcpy_erms)
> -	movq %rdi, %rax
> -	movq %rdx, %rcx
> -	rep movsb
> -	RET
> -SYM_FUNC_END(memcpy_erms)
> -
>  SYM_FUNC_START_LOCAL(memcpy_orig)
>  	movq %rdi, %rax
>  
> diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
> index 6143b1a6fa2c..7c59a704c458 100644
> --- a/tools/arch/x86/lib/memset_64.S
> +++ b/tools/arch/x86/lib/memset_64.S
> @@ -18,27 +18,22 @@
>   * rdx   count (bytes)
>   *
>   * rax   original destination
> + *
> + * The FSRS alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep stosb' itself is small enough to replace the call, but all
> + * the register moves blow up the code. And two of them are "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
>   */
>  SYM_FUNC_START(__memset)
> -	/*
> -	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
> -	 * to use it when possible. If not available, use fast string instructions.
> -	 *
> -	 * Otherwise, use original memset function.
> -	 */
> -	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
> -		      "jmp memset_erms", X86_FEATURE_ERMS
> +	ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
>  
>  	movq %rdi,%r9
> +	movb %sil,%al
>  	movq %rdx,%rcx
> -	andl $7,%edx
> -	shrq $3,%rcx
> -	/* expand byte value  */
> -	movzbl %sil,%esi
> -	movabs $0x0101010101010101,%rax
> -	imulq %rsi,%rax
> -	rep stosq
> -	movl %edx,%ecx
>  	rep stosb
>  	movq %r9,%rax
>  	RET
> @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
>  SYM_FUNC_ALIAS(memset, __memset)
>  EXPORT_SYMBOL(memset)
>  
> -/*
> - * ISO C memset - set a memory block to a byte value. This function uses
> - * enhanced rep stosb to override the fast string function.
> - * The code is simpler and shorter than the fast string function as well.
> - *
> - * rdi   destination
> - * rsi   value (char)
> - * rdx   count (bytes)
> - *
> - * rax   original destination
> - */
> -SYM_FUNC_START_LOCAL(memset_erms)
> -	movq %rdi,%r9
> -	movb %sil,%al
> -	movq %rdx,%rcx
> -	rep stosb
> -	movq %r9,%rax
> -	RET
> -SYM_FUNC_END(memset_erms)
> -
>  SYM_FUNC_START_LOCAL(memset_orig)
>  	movq %rdi,%r10
>  
> -- 
> 2.31.4
> 

-- 

- Arnaldo

  reply	other threads:[~2023-05-10 15:48 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-10 10:21 [PATCH v2 00/17] tools perf: fix build warnings Yanteng Si
2023-05-10 10:21 ` [PATCH v2 01/17] tools headers UAPI: Sync the linux/in.h with the kernel sources Yanteng Si
2023-05-10 10:21 ` [PATCH v2 02/17] tools headers UAPI: Sync the linux/prctl.h " Yanteng Si
2023-05-10 10:21 ` [PATCH v2 03/17] tools headers UAPI: Sync the drm/drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 04/17] tools headers UAPI: Sync the i915_drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 05/17] tools headers UAPI: Sync the coresight-pmu headers copy " Yanteng Si
2023-05-10 15:41   ` Arnaldo Carvalho de Melo
2023-05-22  9:56     ` James Clark
2023-05-10 10:22 ` [PATCH v2 06/17] tools headers UAPI: Sync the linux/const.h with the kernel headers Yanteng Si
2023-05-10 10:22 ` [PATCH v2 07/17] tools headers UAPI: Sync the linux/perf_event.h " Yanteng Si
2023-05-10 15:44   ` Arnaldo Carvalho de Melo
2023-05-10 10:24 ` [PATCH v2 08/17] tools include UAPI: Sync the sound/asound.h copy with the kernel sources Yanteng Si
2023-05-10 10:24 ` [PATCH v2 09/17] tools headers UAPI: Sync the linux/mman.h " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 10/17] tools headers UAPI: Sync the unistd " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 11/17] tools headers kvm: Sync uapi/{asm/linux} kvm.h headers " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 12/17] tools arch x86: Sync the disabled-features " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 " Yanteng Si
2023-05-10 15:48   ` Arnaldo Carvalho de Melo [this message]
2023-05-10 10:24 ` [PATCH v2 14/17] tools arch x86: Sync the cpufeatures " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 15/17] tools arch x86: Sync the msr-index.h copy " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 16/17] tools arch x86: Sync the prctl headers " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 17/17] tools arch arm64: Sync the perf_regs " Yanteng Si
2023-05-10 15:26   ` Arnaldo Carvalho de Melo
2023-05-15  4:08     ` Leo Yan
2023-05-20  3:11       ` Leo Yan
2023-05-10 17:06 ` [PATCH v2 00/17] tools perf: fix build warnings Arnaldo Carvalho de Melo
2023-05-15 11:08   ` Yanteng Si

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZFu8wUIl+GbEjWhn@kernel.org \
    --to=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=loongson-kernel@lists.loongnix.cn \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=siyanteng@loongson.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).