* [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold @ 2016-11-03 5:10 Paul Mackerras 2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras ` (3 more replies) 0 siblings, 4 replies; 7+ messages in thread From: Paul Mackerras @ 2016-11-03 5:10 UTC (permalink / raw) To: linuxppc-dev These functions compute an IP checksum by computing a 64-bit sum and folding it to 32 bits (the "nofold" in their names refers to folding down to 16 bits). However, doing (u32) (s + (s >> 32)) is not sufficient to fold a 64-bit sum to 32 bits correctly. The addition can produce a carry out from bit 31, which needs to be added in to the sum to produce the correct result. To fix this, we copy the from64to32() function from lib/checksum.c and use that. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> --- arch/powerpc/include/asm/checksum.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed..c16c6f8 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,19 +53,27 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum) { #ifdef __powerpc64__ - unsigned long s = (__force u32)sum; + u64 s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; s += proto + len; - s += (s >> 32); - return (__force __wsum) s; + return (__force __wsum) from64to32(s); #else __asm__("\n\ addc %0,%0,%1 \n\ @@ -127,8 +135,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) for (i = 0; i < ihl - 1; i++, ptr++) s += *ptr; - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); #else __wsum sum, tmp; -- 2.10.1 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian 2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras @ 2016-11-03 5:15 ` Paul Mackerras 2017-01-27 0:40 ` [2/2] " Michael Ellerman 2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman ` (2 subsequent siblings) 3 siblings, 1 reply; 7+ messages in thread From: Paul Mackerras @ 2016-11-03 5:15 UTC (permalink / raw) To: linuxppc-dev Currently we have optimized hand-coded assembly checksum routines for big-endian 64-bit systems, but for little-endian we use the generic C routines. This modifies the optimized routines to work for little-endian. With this, we no longer need to enable CONFIG_GENERIC_CSUM. This also fixes a couple of comments in checksum_64.S so they accurately reflect what the associated instruction does. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/checksum.h | 4 ++++ arch/powerpc/lib/Makefile | 2 -- arch/powerpc/lib/checksum_64.S | 12 ++++++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65fba4c..514e6dd 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -164,7 +164,7 @@ config PPC select HAVE_KERNEL_GZIP config GENERIC_CSUM - def_bool CPU_LITTLE_ENDIAN + def_bool n config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index c16c6f8..5f8297b 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -72,7 +72,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, s += (__force u32)saddr; s += (__force u32)daddr; +#ifdef __BIG_ENDIAN s += proto + len; +#else + s += (proto + len) << 8; +#endif return (__force __wsum) from64to32(s); #else __asm__("\n\ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 309361e8..0e649d7 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o -ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(BITS).o checksum_wrappers.o -endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index fd91766..4dd2761 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -36,7 +36,7 @@ _GLOBAL(__csum_partial) * work to calculate the correct checksum, we ignore that case * and take the potential slowdown of unaligned loads. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcsum_aligned li r7,4 @@ -168,8 +168,12 @@ _GLOBAL(__csum_partial) beq .Lcsum_finish lbz r6,0(r3) +#ifdef __BIG_ENDIAN sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif .Lcsum_finish: addze r0,r0 /* add in final carry */ @@ -236,7 +240,7 @@ _GLOBAL(csum_partial_copy_generic) * If the source and destination are relatively unaligned we only * align the source. This keeps things simple. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcopy_aligned li r9,4 @@ -398,8 +402,12 @@ dstnr; sth r6,0(r4) beq .Lcopy_finish srcnr; lbz r6,0(r3) +#ifdef __BIG_ENDIAN sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif dstnr; stb r6,0(r4) .Lcopy_finish: -- 2.10.1 ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [2/2] powerpc/64: Use optimized checksum routines on little-endian 2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras @ 2017-01-27 0:40 ` Michael Ellerman 0 siblings, 0 replies; 7+ messages in thread From: Michael Ellerman @ 2017-01-27 0:40 UTC (permalink / raw) To: Paul Mackerras, linuxppc-dev On Thu, 2016-11-03 at 05:15:42 UTC, Paul Mackerras wrote: > Currently we have optimized hand-coded assembly checksum routines > for big-endian 64-bit systems, but for little-endian we use the > generic C routines. This modifies the optimized routines to work > for little-endian. With this, we no longer need to enable > CONFIG_GENERIC_CSUM. This also fixes a couple of comments > in checksum_64.S so they accurately reflect what the associated > instruction does. > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/d4fde568a34a93897dfb9ae64cfe9d cheers ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold 2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras 2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras @ 2016-11-08 7:23 ` Michael Ellerman 2016-11-08 22:05 ` Paul Mackerras 2016-12-02 9:44 ` Michael Ellerman 2017-01-27 0:40 ` [1/2] " Michael Ellerman 3 siblings, 1 reply; 7+ messages in thread From: Michael Ellerman @ 2016-11-08 7:23 UTC (permalink / raw) To: Paul Mackerras, linuxppc-dev Paul Mackerras <paulus@ozlabs.org> writes: > These functions compute an IP checksum by computing a 64-bit sum and > folding it to 32 bits (the "nofold" in their names refers to folding > down to 16 bits). However, doing (u32) (s + (s >> 32)) is not > sufficient to fold a 64-bit sum to 32 bits correctly. The addition > can produce a carry out from bit 31, which needs to be added in to > the sum to produce the correct result. > > To fix this, we copy the from64to32() function from lib/checksum.c > and use that. This seems to have been broken since ~forever. Do we just not hit that case very often, or do we just incorrectly report checksum failures? Should it go to stable? cheers ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold 2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman @ 2016-11-08 22:05 ` Paul Mackerras 0 siblings, 0 replies; 7+ messages in thread From: Paul Mackerras @ 2016-11-08 22:05 UTC (permalink / raw) To: Michael Ellerman; +Cc: linuxppc-dev On Tue, Nov 08, 2016 at 06:23:30PM +1100, Michael Ellerman wrote: > Paul Mackerras <paulus@ozlabs.org> writes: > > > These functions compute an IP checksum by computing a 64-bit sum and > > folding it to 32 bits (the "nofold" in their names refers to folding > > down to 16 bits). However, doing (u32) (s + (s >> 32)) is not > > sufficient to fold a 64-bit sum to 32 bits correctly. The addition > > can produce a carry out from bit 31, which needs to be added in to > > the sum to produce the correct result. > > > > To fix this, we copy the from64to32() function from lib/checksum.c > > and use that. > > This seems to have been broken since ~forever. Do we just not hit that > case very often, or do we just incorrectly report checksum failures? I think there would be about a 1 in a billion chance of hitting it by chance, though you could probably construct a test case that would hit it every time. If you did hit it in real life it would result in a packet being dropped and presumably retransmitted, and I expect that the IP header of the retransmitted packet would be sufficiently different (i.e. different id field or something) that it wouldn't hit the bug a second time. > Should it go to stable? Probably... though nobody has actually noticed a problem in real life and pinned it down to this. Paul. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold 2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras 2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras 2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman @ 2016-12-02 9:44 ` Michael Ellerman 2017-01-27 0:40 ` [1/2] " Michael Ellerman 3 siblings, 0 replies; 7+ messages in thread From: Michael Ellerman @ 2016-12-02 9:44 UTC (permalink / raw) To: Paul Mackerras, linuxppc-dev Paul Mackerras <paulus@ozlabs.org> writes: > These functions compute an IP checksum by computing a 64-bit sum and > folding it to 32 bits (the "nofold" in their names refers to folding > down to 16 bits). However, doing (u32) (s + (s >> 32)) is not > sufficient to fold a 64-bit sum to 32 bits correctly. The addition > can produce a carry out from bit 31, which needs to be added in to > the sum to produce the correct result. > > To fix this, we copy the from64to32() function from lib/checksum.c > and use that. This collided with: f9d4286b9516 ("arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold") Mind rebasing? cheers ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold 2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras ` (2 preceding siblings ...) 2016-12-02 9:44 ` Michael Ellerman @ 2017-01-27 0:40 ` Michael Ellerman 3 siblings, 0 replies; 7+ messages in thread From: Michael Ellerman @ 2017-01-27 0:40 UTC (permalink / raw) To: Paul Mackerras, linuxppc-dev On Thu, 2016-11-03 at 05:10:55 UTC, Paul Mackerras wrote: > These functions compute an IP checksum by computing a 64-bit sum and > folding it to 32 bits (the "nofold" in their names refers to folding > down to 16 bits). However, doing (u32) (s + (s >> 32)) is not > sufficient to fold a 64-bit sum to 32 bits correctly. The addition > can produce a carry out from bit 31, which needs to be added in to > the sum to produce the correct result. > > To fix this, we copy the from64to32() function from lib/checksum.c > and use that. > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/b492f7e4e07a28e706db26cf4943bb cheers ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-01-27 0:40 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras 2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras 2017-01-27 0:40 ` [2/2] " Michael Ellerman 2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman 2016-11-08 22:05 ` Paul Mackerras 2016-12-02 9:44 ` Michael Ellerman 2017-01-27 0:40 ` [1/2] " Michael Ellerman
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).