All of lore.kernel.org
 help / color / mirror / Atom feed
From: Uros Bizjak <ubizjak@gmail.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org
Cc: Uros Bizjak <ubizjak@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH RESEND] x86/asm/32: Modernize _memcpy()
Date: Tue, 16 Dec 2025 11:37:33 +0100	[thread overview]
Message-ID: <20251216103750.229347-1-ubizjak@gmail.com> (raw)

Use inout "+" constraint modifier where appropriate, declare
temporary variables as unsigned long and rewrite parts of assembly
in plain C. The memcpy() function shrinks by 10 bytes, from:

00e778d0 <memcpy>:
  e778d0:	55                   	push   %ebp
  e778d1:	89 e5                	mov    %esp,%ebp
  e778d3:	83 ec 0c             	sub    $0xc,%esp
  e778d6:	89 5d f4             	mov    %ebx,-0xc(%ebp)
  e778d9:	89 c3                	mov    %eax,%ebx
  e778db:	89 c8                	mov    %ecx,%eax
  e778dd:	89 75 f8             	mov    %esi,-0x8(%ebp)
  e778e0:	c1 e9 02             	shr    $0x2,%ecx
  e778e3:	89 d6                	mov    %edx,%esi
  e778e5:	89 7d fc             	mov    %edi,-0x4(%ebp)
  e778e8:	89 df                	mov    %ebx,%edi
  e778ea:	f3 a5                	rep movsl %ds:(%esi),%es:(%edi)
  e778ec:	89 c1                	mov    %eax,%ecx
  e778ee:	83 e1 03             	and    $0x3,%ecx
  e778f1:	74 02                	je     e778f5 <memcpy+0x25>
  e778f3:	f3 a4                	rep movsb %ds:(%esi),%es:(%edi)
  e778f5:	8b 75 f8             	mov    -0x8(%ebp),%esi
  e778f8:	89 d8                	mov    %ebx,%eax
  e778fa:	8b 5d f4             	mov    -0xc(%ebp),%ebx
  e778fd:	8b 7d fc             	mov    -0x4(%ebp),%edi
  e77900:	89 ec                	mov    %ebp,%esp
  e77902:	5d                   	pop    %ebp
  e77903:	c3                   	ret

to:

00e778b0 <memcpy>:
  e778b0:	55                   	push   %ebp
  e778b1:	89 e5                	mov    %esp,%ebp
  e778b3:	83 ec 08             	sub    $0x8,%esp
  e778b6:	89 75 f8             	mov    %esi,-0x8(%ebp)
  e778b9:	89 d6                	mov    %edx,%esi
  e778bb:	89 ca                	mov    %ecx,%edx
  e778bd:	89 7d fc             	mov    %edi,-0x4(%ebp)
  e778c0:	c1 e9 02             	shr    $0x2,%ecx
  e778c3:	89 c7                	mov    %eax,%edi
  e778c5:	f3 a5                	rep movsl %ds:(%esi),%es:(%edi)
  e778c7:	83 e2 03             	and    $0x3,%edx
  e778ca:	74 04                	je     e778d0 <memcpy+0x20>
  e778cc:	89 d1                	mov    %edx,%ecx
  e778ce:	f3 a4                	rep movsb %ds:(%esi),%es:(%edi)
  e778d0:	8b 75 f8             	mov    -0x8(%ebp),%esi
  e778d3:	8b 7d fc             	mov    -0x4(%ebp),%edi
  e778d6:	89 ec                	mov    %ebp,%esp
  e778d8:	5d                   	pop    %ebp
  e778d9:	c3                   	ret

due to a better register allocation, avoiding the call-saved
%ebx register.

No functional changes intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/string_32.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index e9cce169bb4c..16cdda93e437 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -32,16 +32,18 @@ extern size_t strlen(const char *s);
 
 static __always_inline void *__memcpy(void *to, const void *from, size_t n)
 {
-	int d0, d1, d2;
-	asm volatile("rep movsl\n\t"
-		     "movl %4,%%ecx\n\t"
-		     "andl $3,%%ecx\n\t"
-		     "jz 1f\n\t"
-		     "rep movsb\n\t"
-		     "1:"
-		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
-		     : "memory");
+	unsigned long esi = (unsigned long)from;
+	unsigned long edi = (unsigned long)to;
+	unsigned long ecx = n >> 2;
+
+	asm volatile("rep movsl"
+		     : "+D" (edi), "+S" (esi), "+c" (ecx)
+		     : : "memory");
+	ecx = n & 3;
+	if (ecx)
+		asm volatile("rep movsb"
+			     : "+D" (edi), "+S" (esi), "+c" (ecx)
+			     : : "memory");
 	return to;
 }
 
-- 
2.52.0


             reply	other threads:[~2025-12-16 10:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-16 10:37 Uros Bizjak [this message]
2025-12-16 13:14 ` [PATCH RESEND] x86/asm/32: Modernize _memcpy() David Laight
2025-12-16 16:30   ` Uros Bizjak
2025-12-16 16:38     ` Dave Hansen
2025-12-16 19:18     ` David Laight
  -- strict thread matches above, loose matches on Subject: below --
2025-12-16 22:21 kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251216103750.229347-1-ubizjak@gmail.com \
    --to=ubizjak@gmail.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.