From: "Chang S. Bae" <chang.seok.bae@intel.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com,
bp@alien8.de, dave.hansen@linux.intel.com,
chang.seok.bae@intel.com
Subject: [RFC PATCH 2/3] x86/lib: Convert repeated asm sequences in checksum copy into macros
Date: Mon, 24 Nov 2025 21:32:25 +0000 [thread overview]
Message-ID: <20251124213227.123779-3-chang.seok.bae@intel.com> (raw)
In-Reply-To: <20251124213227.123779-1-chang.seok.bae@intel.com>
Several instruction patterns are repeated in the checksum-copy function.
Replace them with small macros to make concise and more readable.
No functional change.
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
---
These repetitions are related to the loop unrolling, which will be
further extended using EGPRs in the next patch.
---
arch/x86/lib/csum-copy_64.S | 106 ++++++++++++++++--------------------
1 file changed, 48 insertions(+), 58 deletions(-)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 66ed849090b7..5526bdfac041 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -46,6 +46,43 @@
RET
.endm
+.macro prefetch
+30:
+ /*
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+ * potentially unmapped kernel address.
+ */
+ _ASM_EXTABLE(30b, 2f)
+ prefetcht0 5*64(%rdi)
+2:
+.endm
+
+.macro loadregs offset, src, regs:vararg
+ source
+ i = 0
+.irp r, \regs
+ movq 8*(\offset + i)(\src), \r
+.endr
+.endm
+
+.macro storeregs offset, dst, regs:vararg
+ dest
+ i = 0
+.irp r, \regs
+ movq \r, 8*(\offset + i)(\dst)
+.endr
+.endm
+
+.macro sumregs sum, regs:vararg
+.irp r, \regs
+ adcq \r, \sum
+.endr
+.endm
+
+.macro incr ptr, count
+ leaq 8*(\count)(\ptr), \ptr
+.endm
+
.macro _csum_partial_copy
subq $5*8, %rsp
movq %rbx, 0*8(%rsp)
@@ -87,63 +124,18 @@
.p2align 4
.Lloop\@:
- source
- movq (INP), TMP1
- source
- movq 8(INP), TMP2
- source
- movq 16(INP), TMP3
- source
- movq 24(INP), TMP4
+ loadregs 0, INP, TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8
- source
- movq 32(INP), TMP5
- source
- movq 40(INP), TMP6
- source
- movq 48(INP), TMP7
- source
- movq 56(INP), TMP8
+ prefetch
-30:
- /*
- * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
- * potentially unmapped kernel address.
- */
- _ASM_EXTABLE(30b, 2f)
- prefetcht0 5*64(%rdi)
-2:
- adcq TMP1, SUM
- adcq TMP2, SUM
- adcq TMP3, SUM
- adcq TMP4, SUM
- adcq TMP5, SUM
- adcq TMP6, SUM
- adcq TMP7, SUM
- adcq TMP8, SUM
+ sumregs SUM, TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8
decl LEN64B
- dest
- movq TMP1, (OUTP)
- dest
- movq TMP2, 8(OUTP)
- dest
- movq TMP3, 16(OUTP)
- dest
- movq TMP4, 24(OUTP)
+ storeregs 0, OUTP, TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8
- dest
- movq TMP5, 32(OUTP)
- dest
- movq TMP6, 40(OUTP)
- dest
- movq TMP7, 48(OUTP)
- dest
- movq TMP8, 56(OUTP)
-
- leaq 64(INP), INP
- leaq 64(OUTP), OUTP
+ incr INP, 8
+ incr OUTP, 8
jnz .Lloop\@
@@ -159,14 +151,12 @@
clc
.p2align 4
.Lloop_8\@:
- source
- movq (INP), TMP1
- adcq TMP1, SUM
+ loadregs 0, INP, TMP1
+ sumregs SUM, TMP1
decl LEN
- dest
- movq TMP1, (OUTP)
- leaq 8(INP), INP /* preserve carry */
- leaq 8(OUTP), OUTP
+ storeregs 0, OUTP, TMP1
+ incr INP, 1 /* preserve carry */
+ incr OUTP, 1
jnz .Lloop_8\@
adcq ZERO, SUM /* add in carry */
--
2.51.0
next prev parent reply other threads:[~2025-11-24 21:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-24 21:32 [DISCUSSION] x86: In-Kernel Use of Extended General-Purpose Registers Chang S. Bae
2025-11-24 21:32 ` [RFC PATCH 1/3] x86/lib: Refactor csum_partial_copy_generic() into a macro Chang S. Bae
2025-11-24 21:32 ` Chang S. Bae [this message]
2025-11-24 21:32 ` [RFC PATCH 3/3] x86/lib: Use EGPRs in 64-bit checksum copy loop Chang S. Bae
2025-11-25 10:37 ` david laight
2025-12-01 21:39 ` Chang S. Bae
2025-11-26 16:30 ` [DISCUSSION] x86: In-Kernel Use of Extended General-Purpose Registers Peter Zijlstra
2025-12-01 21:40 ` Chang S. Bae
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251124213227.123779-3-chang.seok.bae@intel.com \
--to=chang.seok.bae@intel.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.