From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Yanteng Si <siyanteng@loongson.cn>
Cc: peterz@infradead.org, mingo@redhat.com, mark.rutland@arm.com,
alexander.shishkin@linux.intel.com, jolsa@kernel.org,
namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com,
linux-perf-users@vger.kernel.org,
loongson-kernel@lists.loongnix.cn
Subject: Re: [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 with the kernel sources
Date: Wed, 10 May 2023 12:48:17 -0300 [thread overview]
Message-ID: <ZFu8wUIl+GbEjWhn@kernel.org> (raw)
In-Reply-To: <c1d2cde32b89c3bdb822ab5d1638f2b0f5513902.1683712945.git.siyanteng@loongson.cn>
Em Wed, May 10, 2023 at 06:24:50PM +0800, Yanteng Si escreveu:
> Picking the changes from:
>
> commit 68674f94ffc9dddc ("x86: don't use REP_GOOD or ERMS for small
> memory copies")
>
> Silencing these perf build warnings:
>
> Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs
> from latest version at 'arch/x86/lib/memcpy_64.S'
> diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
> Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs
> from latest version at 'arch/x86/lib/memset_64.S'
> diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S
Dropping this one:
In file included from bench/mem-memcpy-x86-64-asm.S:14:
bench/../../arch/x86/lib/memcpy_64.S:5:10: fatal error: linux/cfi_types.h: No such file or directory
5 | #include <linux/cfi_types.h>
| ^~~~~~~~~~~~~~~~~~~
compilation terminated.
> Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
> ---
> tools/arch/x86/lib/memcpy_64.S | 35 ++++++++-----------------
> tools/arch/x86/lib/memset_64.S | 47 ++++++++--------------------------
> 2 files changed, 22 insertions(+), 60 deletions(-)
>
> diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
> index a91ac666f758..8f95fb267caa 100644
> --- a/tools/arch/x86/lib/memcpy_64.S
> +++ b/tools/arch/x86/lib/memcpy_64.S
> @@ -2,6 +2,7 @@
> /* Copyright 2002 Andi Kleen */
>
> #include <linux/linkage.h>
> +#include <linux/cfi_types.h>
> #include <asm/errno.h>
> #include <asm/cpufeatures.h>
> #include <asm/alternative.h>
> @@ -9,13 +10,6 @@
>
> .section .noinstr.text, "ax"
>
> -/*
> - * We build a jump to memcpy_orig by default which gets NOPped out on
> - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
> - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
> - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
> - */
> -
> /*
> * memcpy - Copy a memory block.
> *
> @@ -26,17 +20,21 @@
> *
> * Output:
> * rax original destination
> + *
> + * The FSRM alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep movsb' itself is small enough to replace the call, but the
> + * two register moves blow up the code. And one of them is "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
> */
> SYM_TYPED_FUNC_START(__memcpy)
> - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
> - "jmp memcpy_erms", X86_FEATURE_ERMS
> + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
>
> movq %rdi, %rax
> movq %rdx, %rcx
> - shrq $3, %rcx
> - andl $7, %edx
> - rep movsq
> - movl %edx, %ecx
> rep movsb
> RET
> SYM_FUNC_END(__memcpy)
> @@ -45,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
> SYM_FUNC_ALIAS(memcpy, __memcpy)
> EXPORT_SYMBOL(memcpy)
>
> -/*
> - * memcpy_erms() - enhanced fast string memcpy. This is faster and
> - * simpler than memcpy. Use memcpy_erms when possible.
> - */
> -SYM_FUNC_START_LOCAL(memcpy_erms)
> - movq %rdi, %rax
> - movq %rdx, %rcx
> - rep movsb
> - RET
> -SYM_FUNC_END(memcpy_erms)
> -
> SYM_FUNC_START_LOCAL(memcpy_orig)
> movq %rdi, %rax
>
> diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
> index 6143b1a6fa2c..7c59a704c458 100644
> --- a/tools/arch/x86/lib/memset_64.S
> +++ b/tools/arch/x86/lib/memset_64.S
> @@ -18,27 +18,22 @@
> * rdx count (bytes)
> *
> * rax original destination
> + *
> + * The FSRS alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep stosb' itself is small enough to replace the call, but all
> + * the register moves blow up the code. And two of them are "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
> */
> SYM_FUNC_START(__memset)
> - /*
> - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
> - * to use it when possible. If not available, use fast string instructions.
> - *
> - * Otherwise, use original memset function.
> - */
> - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
> - "jmp memset_erms", X86_FEATURE_ERMS
> + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
>
> movq %rdi,%r9
> + movb %sil,%al
> movq %rdx,%rcx
> - andl $7,%edx
> - shrq $3,%rcx
> - /* expand byte value */
> - movzbl %sil,%esi
> - movabs $0x0101010101010101,%rax
> - imulq %rsi,%rax
> - rep stosq
> - movl %edx,%ecx
> rep stosb
> movq %r9,%rax
> RET
> @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
> SYM_FUNC_ALIAS(memset, __memset)
> EXPORT_SYMBOL(memset)
>
> -/*
> - * ISO C memset - set a memory block to a byte value. This function uses
> - * enhanced rep stosb to override the fast string function.
> - * The code is simpler and shorter than the fast string function as well.
> - *
> - * rdi destination
> - * rsi value (char)
> - * rdx count (bytes)
> - *
> - * rax original destination
> - */
> -SYM_FUNC_START_LOCAL(memset_erms)
> - movq %rdi,%r9
> - movb %sil,%al
> - movq %rdx,%rcx
> - rep stosb
> - movq %r9,%rax
> - RET
> -SYM_FUNC_END(memset_erms)
> -
> SYM_FUNC_START_LOCAL(memset_orig)
> movq %rdi,%r10
>
> --
> 2.31.4
>
--
- Arnaldo
next prev parent reply other threads:[~2023-05-10 15:48 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-10 10:21 [PATCH v2 00/17] tools perf: fix build warnings Yanteng Si
2023-05-10 10:21 ` [PATCH v2 01/17] tools headers UAPI: Sync the linux/in.h with the kernel sources Yanteng Si
2023-05-10 10:21 ` [PATCH v2 02/17] tools headers UAPI: Sync the linux/prctl.h " Yanteng Si
2023-05-10 10:21 ` [PATCH v2 03/17] tools headers UAPI: Sync the drm/drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 04/17] tools headers UAPI: Sync the i915_drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 05/17] tools headers UAPI: Sync the coresight-pmu headers copy " Yanteng Si
2023-05-10 15:41 ` Arnaldo Carvalho de Melo
2023-05-22 9:56 ` James Clark
2023-05-10 10:22 ` [PATCH v2 06/17] tools headers UAPI: Sync the linux/const.h with the kernel headers Yanteng Si
2023-05-10 10:22 ` [PATCH v2 07/17] tools headers UAPI: Sync the linux/perf_event.h " Yanteng Si
2023-05-10 15:44 ` Arnaldo Carvalho de Melo
2023-05-10 10:24 ` [PATCH v2 08/17] tools include UAPI: Sync the sound/asound.h copy with the kernel sources Yanteng Si
2023-05-10 10:24 ` [PATCH v2 09/17] tools headers UAPI: Sync the linux/mman.h " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 10/17] tools headers UAPI: Sync the unistd " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 11/17] tools headers kvm: Sync uapi/{asm/linux} kvm.h headers " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 12/17] tools arch x86: Sync the disabled-features " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 " Yanteng Si
2023-05-10 15:48 ` Arnaldo Carvalho de Melo [this message]
2023-05-10 10:24 ` [PATCH v2 14/17] tools arch x86: Sync the cpufeatures " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 15/17] tools arch x86: Sync the msr-index.h copy " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 16/17] tools arch x86: Sync the prctl headers " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 17/17] tools arch arm64: Sync the perf_regs " Yanteng Si
2023-05-10 15:26 ` Arnaldo Carvalho de Melo
2023-05-15 4:08 ` Leo Yan
2023-05-20 3:11 ` Leo Yan
2023-05-10 17:06 ` [PATCH v2 00/17] tools perf: fix build warnings Arnaldo Carvalho de Melo
2023-05-15 11:08 ` Yanteng Si
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZFu8wUIl+GbEjWhn@kernel.org \
--to=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=loongson-kernel@lists.loongnix.cn \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=siyanteng@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).