public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Laight <david.laight.linux@gmail.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: Re: [PATCH RESEND] x86/asm/32: Modernize _memcpy()
Date: Tue, 16 Dec 2025 13:14:06 +0000	[thread overview]
Message-ID: <20251216131406.43bfbbaa@pumpkin> (raw)
In-Reply-To: <20251216103750.229347-1-ubizjak@gmail.com>

On Tue, 16 Dec 2025 11:37:33 +0100
Uros Bizjak <ubizjak@gmail.com> wrote:

> Use inout "+" constraint modifier where appropriate, declare
> temporary variables as unsigned long and rewrite parts of assembly
> in plain C. The memcpy() function shrinks by 10 bytes, from:
> 
> 00e778d0 <memcpy>:
>   e778d0:	55                   	push   %ebp
>   e778d1:	89 e5                	mov    %esp,%ebp
>   e778d3:	83 ec 0c             	sub    $0xc,%esp
>   e778d6:	89 5d f4             	mov    %ebx,-0xc(%ebp)
>   e778d9:	89 c3                	mov    %eax,%ebx
>   e778db:	89 c8                	mov    %ecx,%eax
>   e778dd:	89 75 f8             	mov    %esi,-0x8(%ebp)
>   e778e0:	c1 e9 02             	shr    $0x2,%ecx
>   e778e3:	89 d6                	mov    %edx,%esi
>   e778e5:	89 7d fc             	mov    %edi,-0x4(%ebp)
>   e778e8:	89 df                	mov    %ebx,%edi
>   e778ea:	f3 a5                	rep movsl %ds:(%esi),%es:(%edi)
>   e778ec:	89 c1                	mov    %eax,%ecx
>   e778ee:	83 e1 03             	and    $0x3,%ecx
>   e778f1:	74 02                	je     e778f5 <memcpy+0x25>
>   e778f3:	f3 a4                	rep movsb %ds:(%esi),%es:(%edi)
>   e778f5:	8b 75 f8             	mov    -0x8(%ebp),%esi
>   e778f8:	89 d8                	mov    %ebx,%eax
>   e778fa:	8b 5d f4             	mov    -0xc(%ebp),%ebx
>   e778fd:	8b 7d fc             	mov    -0x4(%ebp),%edi
>   e77900:	89 ec                	mov    %ebp,%esp
>   e77902:	5d                   	pop    %ebp
>   e77903:	c3                   	ret
> 
> to:
> 
> 00e778b0 <memcpy>:
>   e778b0:	55                   	push   %ebp
>   e778b1:	89 e5                	mov    %esp,%ebp
>   e778b3:	83 ec 08             	sub    $0x8,%esp
>   e778b6:	89 75 f8             	mov    %esi,-0x8(%ebp)
>   e778b9:	89 d6                	mov    %edx,%esi
>   e778bb:	89 ca                	mov    %ecx,%edx
>   e778bd:	89 7d fc             	mov    %edi,-0x4(%ebp)
>   e778c0:	c1 e9 02             	shr    $0x2,%ecx
>   e778c3:	89 c7                	mov    %eax,%edi
>   e778c5:	f3 a5                	rep movsl %ds:(%esi),%es:(%edi)
>   e778c7:	83 e2 03             	and    $0x3,%edx
>   e778ca:	74 04                	je     e778d0 <memcpy+0x20>
>   e778cc:	89 d1                	mov    %edx,%ecx
>   e778ce:	f3 a4                	rep movsb %ds:(%esi),%es:(%edi)
>   e778d0:	8b 75 f8             	mov    -0x8(%ebp),%esi
>   e778d3:	8b 7d fc             	mov    -0x4(%ebp),%edi
>   e778d6:	89 ec                	mov    %ebp,%esp
>   e778d8:	5d                   	pop    %ebp
>   e778d9:	c3                   	ret
> 
> due to a better register allocation, avoiding the call-saved
> %ebx register.

That'll might be semi-random.

> 
> No functional changes intended.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> ---
>  arch/x86/include/asm/string_32.h | 22 ++++++++++++----------
>  1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
> index e9cce169bb4c..16cdda93e437 100644
> --- a/arch/x86/include/asm/string_32.h
> +++ b/arch/x86/include/asm/string_32.h
> @@ -32,16 +32,18 @@ extern size_t strlen(const char *s);
>  
>  static __always_inline void *__memcpy(void *to, const void *from, size_t n)
>  {
> -	int d0, d1, d2;
> -	asm volatile("rep movsl\n\t"
> -		     "movl %4,%%ecx\n\t"
> -		     "andl $3,%%ecx\n\t"
> -		     "jz 1f\n\t"
> -		     "rep movsb\n\t"
> -		     "1:"
> -		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
> -		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
> -		     : "memory");
> +	unsigned long esi = (unsigned long)from;
> +	unsigned long edi = (unsigned long)to;

There is no need to cast to 'unsigned long', the same code should be
generated if 'void *' is used.

> +	unsigned long ecx = n >> 2;
> +
> +	asm volatile("rep movsl"
> +		     : "+D" (edi), "+S" (esi), "+c" (ecx)
> +		     : : "memory");
> +	ecx = n & 3;
> +	if (ecx)
> +		asm volatile("rep movsb"
> +			     : "+D" (edi), "+S" (esi), "+c" (ecx)
> +			     : : "memory");
>  	return to;
>  }
> 

This version seems to generate better code still:
see https://godbolt.org/z/78cq97PPj

void *__memcpy(void *to, const void *from, unsigned long n)
{
	unsigned long ecx = n >> 2;

	asm volatile("rep movsl"
		     : "+D" (to), "+S" (from), "+c" (ecx)
		     : : "memory");
	ecx = n & 3;
	if (ecx)
		asm volatile("rep movsb"
			     : "+D" (to), "+S" (from), "+c" (ecx)
			     : : "memory");
	return (char *)to - n;
}

Note that the performance is horrid on a lot of cpu.
(In particular the copy of the remaining 1-3 bytes.)

	David


  reply	other threads:[~2025-12-16 13:14 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-16 10:37 [PATCH RESEND] x86/asm/32: Modernize _memcpy() Uros Bizjak
2025-12-16 13:14 ` David Laight [this message]
2025-12-16 16:30   ` Uros Bizjak
2025-12-16 16:38     ` Dave Hansen
2025-12-16 19:18     ` David Laight

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251216131406.43bfbbaa@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=ubizjak@gmail.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox