From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Yanteng Si <siyanteng@loongson.cn>
Cc: peterz@infradead.org, mingo@redhat.com, mark.rutland@arm.com,
alexander.shishkin@linux.intel.com, jolsa@kernel.org,
namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com,
linux-perf-users@vger.kernel.org,
loongson-kernel@lists.loongnix.cn
Subject: Re: [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 with the kernel sources
Date: Wed, 10 May 2023 12:48:17 -0300 [thread overview]
Message-ID: <ZFu8wUIl+GbEjWhn@kernel.org> (raw)
In-Reply-To: <c1d2cde32b89c3bdb822ab5d1638f2b0f5513902.1683712945.git.siyanteng@loongson.cn>
Em Wed, May 10, 2023 at 06:24:50PM +0800, Yanteng Si escreveu:
> Picking the changes from:
>
> commit 68674f94ffc9dddc ("x86: don't use REP_GOOD or ERMS for small
> memory copies")
>
> Silencing these perf build warnings:
>
> Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs
> from latest version at 'arch/x86/lib/memcpy_64.S'
> diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
> Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs
> from latest version at 'arch/x86/lib/memset_64.S'
> diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S
Dropping this one:
In file included from bench/mem-memcpy-x86-64-asm.S:14:
bench/../../arch/x86/lib/memcpy_64.S:5:10: fatal error: linux/cfi_types.h: No such file or directory
5 | #include <linux/cfi_types.h>
| ^~~~~~~~~~~~~~~~~~~
compilation terminated.
> Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
> ---
> tools/arch/x86/lib/memcpy_64.S | 35 ++++++++-----------------
> tools/arch/x86/lib/memset_64.S | 47 ++++++++--------------------------
> 2 files changed, 22 insertions(+), 60 deletions(-)
>
> diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
> index a91ac666f758..8f95fb267caa 100644
> --- a/tools/arch/x86/lib/memcpy_64.S
> +++ b/tools/arch/x86/lib/memcpy_64.S
> @@ -2,6 +2,7 @@
> /* Copyright 2002 Andi Kleen */
>
> #include <linux/linkage.h>
> +#include <linux/cfi_types.h>
> #include <asm/errno.h>
> #include <asm/cpufeatures.h>
> #include <asm/alternative.h>
> @@ -9,13 +10,6 @@
>
> .section .noinstr.text, "ax"
>
> -/*
> - * We build a jump to memcpy_orig by default which gets NOPped out on
> - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
> - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
> - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
> - */
> -
> /*
> * memcpy - Copy a memory block.
> *
> @@ -26,17 +20,21 @@
> *
> * Output:
> * rax original destination
> + *
> + * The FSRM alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep movsb' itself is small enough to replace the call, but the
> + * two register moves blow up the code. And one of them is "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
> */
> SYM_TYPED_FUNC_START(__memcpy)
> - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
> - "jmp memcpy_erms", X86_FEATURE_ERMS
> + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
>
> movq %rdi, %rax
> movq %rdx, %rcx
> - shrq $3, %rcx
> - andl $7, %edx
> - rep movsq
> - movl %edx, %ecx
> rep movsb
> RET
> SYM_FUNC_END(__memcpy)
> @@ -45,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
> SYM_FUNC_ALIAS(memcpy, __memcpy)
> EXPORT_SYMBOL(memcpy)
>
> -/*
> - * memcpy_erms() - enhanced fast string memcpy. This is faster and
> - * simpler than memcpy. Use memcpy_erms when possible.
> - */
> -SYM_FUNC_START_LOCAL(memcpy_erms)
> - movq %rdi, %rax
> - movq %rdx, %rcx
> - rep movsb
> - RET
> -SYM_FUNC_END(memcpy_erms)
> -
> SYM_FUNC_START_LOCAL(memcpy_orig)
> movq %rdi, %rax
>
> diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
> index 6143b1a6fa2c..7c59a704c458 100644
> --- a/tools/arch/x86/lib/memset_64.S
> +++ b/tools/arch/x86/lib/memset_64.S
> @@ -18,27 +18,22 @@
> * rdx count (bytes)
> *
> * rax original destination
> + *
> + * The FSRS alternative should be done inline (avoiding the call and
> + * the disgusting return handling), but that would require some help
> + * from the compiler for better calling conventions.
> + *
> + * The 'rep stosb' itself is small enough to replace the call, but all
> + * the register moves blow up the code. And two of them are "needed"
> + * only for the return value that is the same as the source input,
> + * which the compiler could/should do much better anyway.
> */
> SYM_FUNC_START(__memset)
> - /*
> - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
> - * to use it when possible. If not available, use fast string instructions.
> - *
> - * Otherwise, use original memset function.
> - */
> - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
> - "jmp memset_erms", X86_FEATURE_ERMS
> + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
>
> movq %rdi,%r9
> + movb %sil,%al
> movq %rdx,%rcx
> - andl $7,%edx
> - shrq $3,%rcx
> - /* expand byte value */
> - movzbl %sil,%esi
> - movabs $0x0101010101010101,%rax
> - imulq %rsi,%rax
> - rep stosq
> - movl %edx,%ecx
> rep stosb
> movq %r9,%rax
> RET
> @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
> SYM_FUNC_ALIAS(memset, __memset)
> EXPORT_SYMBOL(memset)
>
> -/*
> - * ISO C memset - set a memory block to a byte value. This function uses
> - * enhanced rep stosb to override the fast string function.
> - * The code is simpler and shorter than the fast string function as well.
> - *
> - * rdi destination
> - * rsi value (char)
> - * rdx count (bytes)
> - *
> - * rax original destination
> - */
> -SYM_FUNC_START_LOCAL(memset_erms)
> - movq %rdi,%r9
> - movb %sil,%al
> - movq %rdx,%rcx
> - rep stosb
> - movq %r9,%rax
> - RET
> -SYM_FUNC_END(memset_erms)
> -
> SYM_FUNC_START_LOCAL(memset_orig)
> movq %rdi,%r10
>
> --
> 2.31.4
>
--
- Arnaldo
next prev parent reply other threads:[~2023-05-10 15:48 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-10 10:21 [PATCH v2 00/17] tools perf: fix build warnings Yanteng Si
2023-05-10 10:21 ` [PATCH v2 01/17] tools headers UAPI: Sync the linux/in.h with the kernel sources Yanteng Si
2023-05-10 10:21 ` [PATCH v2 02/17] tools headers UAPI: Sync the linux/prctl.h " Yanteng Si
2023-05-10 10:21 ` [PATCH v2 03/17] tools headers UAPI: Sync the drm/drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 04/17] tools headers UAPI: Sync the i915_drm.h " Yanteng Si
2023-05-10 10:22 ` [PATCH v2 05/17] tools headers UAPI: Sync the coresight-pmu headers copy " Yanteng Si
2023-05-10 15:41 ` Arnaldo Carvalho de Melo
2023-05-22 9:56 ` James Clark
2023-05-10 10:22 ` [PATCH v2 06/17] tools headers UAPI: Sync the linux/const.h with the kernel headers Yanteng Si
2023-05-10 10:22 ` [PATCH v2 07/17] tools headers UAPI: Sync the linux/perf_event.h " Yanteng Si
2023-05-10 15:44 ` Arnaldo Carvalho de Melo
2023-05-10 10:24 ` [PATCH v2 08/17] tools include UAPI: Sync the sound/asound.h copy with the kernel sources Yanteng Si
2023-05-10 10:24 ` [PATCH v2 09/17] tools headers UAPI: Sync the linux/mman.h " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 10/17] tools headers UAPI: Sync the unistd " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 11/17] tools headers kvm: Sync uapi/{asm/linux} kvm.h headers " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 12/17] tools arch x86: Sync the disabled-features " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 13/17] tools arch x86: Sync the memcpy_64 " Yanteng Si
2023-05-10 15:48 ` Arnaldo Carvalho de Melo [this message]
2023-05-10 10:24 ` [PATCH v2 14/17] tools arch x86: Sync the cpufeatures " Yanteng Si
2023-05-10 10:24 ` [PATCH v2 15/17] tools arch x86: Sync the msr-index.h copy " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 16/17] tools arch x86: Sync the prctl headers " Yanteng Si
2023-05-10 10:25 ` [PATCH v2 17/17] tools arch arm64: Sync the perf_regs " Yanteng Si
2023-05-10 15:26 ` Arnaldo Carvalho de Melo
2023-05-15 4:08 ` Leo Yan
2023-05-20 3:11 ` Leo Yan
2023-05-10 17:06 ` [PATCH v2 00/17] tools perf: fix build warnings Arnaldo Carvalho de Melo
2023-05-15 11:08 ` Yanteng Si
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZFu8wUIl+GbEjWhn@kernel.org \
--to=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=loongson-kernel@lists.loongnix.cn \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=siyanteng@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.