netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86_64: inline csum_ipv6_magic()
@ 2025-11-13 15:45 Eric Dumazet
  2025-11-13 16:26 ` Dave Hansen
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2025-11-13 15:45 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H . Peter Anvin, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: linux-kernel, Simon Horman, Kuniyuki Iwashima, netdev,
	Eric Dumazet, Eric Dumazet

Inline this small helper.

This reduces register pressure, as saddr and daddr are often
back to back in memory.

For instance code inlined in tcp6_gro_receive() will look like:

 55a:	48 03 73 28          	add    0x28(%rbx),%rsi
 55e:	8b 43 70             	mov    0x70(%rbx),%eax
 561:	29 f8                	sub    %edi,%eax
 563:	0f c8                	bswap  %eax
 565:	89 c0                	mov    %eax,%eax
 567:	48 05 00 06 00 00    	add    $0x600,%rax
 56d:	48 03 46 08          	add    0x8(%rsi),%rax
 571:	48 13 46 10          	adc    0x10(%rsi),%rax
 575:	48 13 46 18          	adc    0x18(%rsi),%rax
 579:	48 13 46 20          	adc    0x20(%rsi),%rax
 57d:	48 83 d0 00          	adc    $0x0,%rax
 581:	48 89 c6             	mov    %rax,%rsi
 584:	48 c1 ee 20          	shr    $0x20,%rsi
 588:	01 f0                	add    %esi,%eax
 58a:	83 d0 00             	adc    $0x0,%eax
 58d:	89 c6                	mov    %eax,%esi
 58f:	66 31 c0             	xor    %ax,%ax

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 arch/x86/include/asm/checksum_64.h | 45 ++++++++++++++++++++++--------
 arch/x86/lib/csum-wrappers_64.c    | 22 ---------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 4d4a47a3a8ab2310d279f7e465032b1463200393..5bdfd2db2b5a573ff8193a4878d372c97b158f47 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/compiler.h>
+#include <linux/in6.h>
 #include <asm/byteorder.h>
 
 /**
@@ -145,6 +146,17 @@ extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
  */
 extern __sum16 ip_compute_csum(const void *buff, int len);
 
+static inline unsigned add32_with_carry(unsigned a, unsigned b)
+{
+	asm("addl %2,%0\n\t"
+	    "adcl $0,%0"
+	    : "=r" (a)
+	    : "0" (a), "rm" (b));
+	return a;
+}
+
+#define _HAVE_ARCH_IPV6_CSUM 1
+
 /**
  * csum_ipv6_magic - Compute checksum of an IPv6 pseudo header.
  * @saddr: source address
@@ -158,20 +170,29 @@ extern __sum16 ip_compute_csum(const void *buff, int len);
  * Returns the unfolded 32bit checksum.
  */
 
-struct in6_addr;
+static inline __sum16 csum_ipv6_magic(
+	const struct in6_addr *_saddr, const struct in6_addr *_daddr,
+	__u32 len, __u8 proto, __wsum sum)
+{
+	const unsigned long *saddr = (const unsigned long *)_saddr;
+	const unsigned long *daddr = (const unsigned long *)_daddr;
+	__u64 sum64;
 
-#define _HAVE_ARCH_IPV6_CSUM 1
-extern __sum16
-csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr,
-		__u32 len, __u8 proto, __wsum sum);
+	sum64 = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
+		(__force __u64)sum;
 
-static inline unsigned add32_with_carry(unsigned a, unsigned b)
-{
-	asm("addl %2,%0\n\t"
-	    "adcl $0,%0"
-	    : "=r" (a)
-	    : "0" (a), "rm" (b));
-	return a;
+	asm("	addq %1,%[sum64]\n"
+	    "	adcq %2,%[sum64]\n"
+	    "	adcq %3,%[sum64]\n"
+	    "	adcq %4,%[sum64]\n"
+	    "	adcq $0,%[sum64]\n"
+
+	    : [sum64] "+r" (sum64)
+	    : "m" (saddr[0]), "m" (saddr[1]),
+	      "m" (daddr[0]), "m" (daddr[1]));
+
+	return csum_fold(
+	       (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
 }
 
 #define HAVE_ARCH_CSUM_ADD
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index f4df4d241526c64a5ad2eabdcbf5f0d8d56d6fd8..831b7110b041598b9764a6647fa259e1058efef2 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -68,25 +68,3 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
 }
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
 
-__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
-			const struct in6_addr *daddr,
-			__u32 len, __u8 proto, __wsum sum)
-{
-	__u64 rest, sum64;
-
-	rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
-		(__force __u64)sum;
-
-	asm("	addq (%[saddr]),%[sum]\n"
-	    "	adcq 8(%[saddr]),%[sum]\n"
-	    "	adcq (%[daddr]),%[sum]\n"
-	    "	adcq 8(%[daddr]),%[sum]\n"
-	    "	adcq $0,%[sum]\n"
-
-	    : [sum] "=r" (sum64)
-	    : "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
-
-	return csum_fold(
-	       (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
-}
-EXPORT_SYMBOL(csum_ipv6_magic);
-- 
2.51.2.1041.gc1ab5b90ca-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-12-23  5:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-13 15:45 [PATCH] x86_64: inline csum_ipv6_magic() Eric Dumazet
2025-11-13 16:26 ` Dave Hansen
2025-11-13 18:18   ` Eric Dumazet
2025-11-13 18:40     ` Dave Hansen
2025-12-23  5:03       ` Eric Dumazet
2025-11-13 20:03     ` David Laight
2025-11-13 20:12       ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).