* [PATCH] MIPS checksum fix
[not found] ` <20080919112304.GB13440@linux-mips.org>
@ 2008-09-19 11:47 ` Ralf Baechle
2008-09-19 12:07 ` Ralf Baechle
0 siblings, 1 reply; 9+ messages in thread
From: Ralf Baechle @ 2008-09-19 11:47 UTC (permalink / raw)
To: Maciej W. Rozycki; +Cc: Atsushi Nemoto, u1, linux-mips, netdev
On Fri, Sep 19, 2008 at 01:23:04PM +0200, Ralf Baechle wrote:
> > I can see you have done the microoptimisation I had in mind meanwhile --
> > thanks for saving me the effort. ;) There is a delay slot to fill left
> > though -- will you take care of it too?
>
> Will do - just couldn't be bothered (too) late last night ...
Voila.
I'm interested in test reports of this on all sorts of configurations -
32-bit, 64-bit, big / little endian, R2 processors and pre-R2. In
particular Cavium being the only MIPS64 R2 implementation would be
interesting. This definately is stuff which should go upstream for 2.6.27.
Ralf
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 8d77841..eac0d61 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -53,12 +53,14 @@
#define UNIT(unit) ((unit)*NBYTES)
#define ADDC(sum,reg) \
- .set push; \
- .set noat; \
ADD sum, reg; \
sltu v1, sum, reg; \
ADD sum, v1; \
- .set pop
+
+#define ADDC32(sum,reg) \
+ addu sum, reg; \
+ sltu v1, sum, reg; \
+ addu sum, v1; \
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
LOAD _t0, (offset + UNIT(0))(src); \
@@ -254,8 +256,6 @@ LEAF(csum_partial)
1: ADDC(sum, t1)
/* fold checksum */
- .set push
- .set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
@@ -263,24 +263,25 @@ LEAF(csum_partial)
dsra32 sum, sum, 0
addu sum, v1
#endif
- sll v1, sum, 16
- addu sum, v1
- sltu v1, sum, v1
- srl sum, sum, 16
- addu sum, v1
/* odd buffer alignment? */
- beqz t7, 1f
- nop
- sll v1, sum, 8
+#ifdef CPU_MIPSR2
+ wsbh sum, sum
+ movn sum, v1, t7
+#else
+ beqz t7, 1f /* odd buffer alignment? */
+ lui v1, 0x00ff
+ addu v1, 0x00ff
+ and t0, sum, v1
+ sll t0, t0, 8
srl sum, sum, 8
- or sum, v1
- andi sum, 0xffff
- .set pop
+ and sum, sum, v1
+ or sum, sum, t0
1:
+#endif
.set reorder
/* Add the passed partial csum. */
- ADDC(sum, a2)
+ ADDC32(sum, a2)
jr ra
.set noreorder
END(csum_partial)
@@ -656,8 +657,6 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
ADDC(sum, t2)
.Ldone:
/* fold checksum */
- .set push
- .set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
@@ -665,23 +664,23 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
dsra32 sum, sum, 0
addu sum, v1
#endif
- sll v1, sum, 16
- addu sum, v1
- sltu v1, sum, v1
- srl sum, sum, 16
- addu sum, v1
- /* odd buffer alignment? */
- beqz odd, 1f
- nop
- sll v1, sum, 8
+#ifdef CPU_MIPSR2
+ wsbh v1, sum
+ movn sum, v1, odd
+#else
+ beqz odd, 1f /* odd buffer alignment? */
+ lui v1, 0x00ff
+ addu v1, 0x00ff
+ and t0, sum, v1
+ sll t0, t0, 8
srl sum, sum, 8
- or sum, v1
- andi sum, 0xffff
- .set pop
+ and sum, sum, v1
+ or sum, sum, t0
1:
+#endif
.set reorder
- ADDC(sum, psum)
+ ADDC32(sum, psum)
jr ra
.set noreorder
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 11:47 ` [PATCH] MIPS checksum fix Ralf Baechle
@ 2008-09-19 12:07 ` Ralf Baechle
2008-09-19 12:15 ` Maciej W. Rozycki
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Ralf Baechle @ 2008-09-19 12:07 UTC (permalink / raw)
To: Maciej W. Rozycki; +Cc: Atsushi Nemoto, u1, linux-mips, netdev
On Fri, Sep 19, 2008 at 01:47:43PM +0200, Ralf Baechle wrote:
> I'm interested in test reports of this on all sorts of configurations -
> 32-bit, 64-bit, big / little endian, R2 processors and pre-R2. In
> particular Cavium being the only MIPS64 R2 implementation would be
> interesting. This definately is stuff which should go upstream for 2.6.27.
There was a trivial bug in the R2 code.
>From 97ad23f4696a322cb3bc379a25a8c0f6526751d6 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 19 Sep 2008 14:05:53 +0200
Subject: [PATCH] [MIPS] Fix 64-bit csum_partial, __csum_partial_copy_user and csum_partial_copy
On 64-bit machines it wouldn't handle a possible carry when adding the
32-bit folded checksum and checksum argument.
While at it, add a few trivial optimizations, also for R2 processors.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 8d77841..c77a7a0 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -53,12 +53,14 @@
#define UNIT(unit) ((unit)*NBYTES)
#define ADDC(sum,reg) \
- .set push; \
- .set noat; \
ADD sum, reg; \
sltu v1, sum, reg; \
ADD sum, v1; \
- .set pop
+
+#define ADDC32(sum,reg) \
+ addu sum, reg; \
+ sltu v1, sum, reg; \
+ addu sum, v1; \
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
LOAD _t0, (offset + UNIT(0))(src); \
@@ -254,8 +256,6 @@ LEAF(csum_partial)
1: ADDC(sum, t1)
/* fold checksum */
- .set push
- .set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
@@ -263,24 +263,25 @@ LEAF(csum_partial)
dsra32 sum, sum, 0
addu sum, v1
#endif
- sll v1, sum, 16
- addu sum, v1
- sltu v1, sum, v1
- srl sum, sum, 16
- addu sum, v1
/* odd buffer alignment? */
- beqz t7, 1f
- nop
- sll v1, sum, 8
+#ifdef CPU_MIPSR2
+ wsbh v1, sum
+ movn sum, v1, t7
+#else
+ beqz t7, 1f /* odd buffer alignment? */
+ lui v1, 0x00ff
+ addu v1, 0x00ff
+ and t0, sum, v1
+ sll t0, t0, 8
srl sum, sum, 8
- or sum, v1
- andi sum, 0xffff
- .set pop
+ and sum, sum, v1
+ or sum, sum, t0
1:
+#endif
.set reorder
/* Add the passed partial csum. */
- ADDC(sum, a2)
+ ADDC32(sum, a2)
jr ra
.set noreorder
END(csum_partial)
@@ -656,8 +657,6 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
ADDC(sum, t2)
.Ldone:
/* fold checksum */
- .set push
- .set noat
#ifdef USE_DOUBLE
dsll32 v1, sum, 0
daddu sum, v1
@@ -665,23 +664,23 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
dsra32 sum, sum, 0
addu sum, v1
#endif
- sll v1, sum, 16
- addu sum, v1
- sltu v1, sum, v1
- srl sum, sum, 16
- addu sum, v1
- /* odd buffer alignment? */
- beqz odd, 1f
- nop
- sll v1, sum, 8
+#ifdef CPU_MIPSR2
+ wsbh v1, sum
+ movn sum, v1, odd
+#else
+ beqz odd, 1f /* odd buffer alignment? */
+ lui v1, 0x00ff
+ addu v1, 0x00ff
+ and t0, sum, v1
+ sll t0, t0, 8
srl sum, sum, 8
- or sum, v1
- andi sum, 0xffff
- .set pop
+ and sum, sum, v1
+ or sum, sum, t0
1:
+#endif
.set reorder
- ADDC(sum, psum)
+ ADDC32(sum, psum)
jr ra
.set noreorder
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 12:07 ` Ralf Baechle
@ 2008-09-19 12:15 ` Maciej W. Rozycki
2008-09-19 14:09 ` Atsushi Nemoto
2008-09-23 21:52 ` Bryan Phillippe
2 siblings, 0 replies; 9+ messages in thread
From: Maciej W. Rozycki @ 2008-09-19 12:15 UTC (permalink / raw)
To: Ralf Baechle; +Cc: Atsushi Nemoto, u1, linux-mips, netdev
On Fri, 19 Sep 2008, Ralf Baechle wrote:
> + beqz t7, 1f /* odd buffer alignment? */
> + lui v1, 0x00ff
Well, .set reorder to move something from before the branch would have
been a little bit better for the common aligned case. ;) There is nothing
special about branch delay slots in the whole epilogue, so one from just
before the return might simply be relocated here.
Maciej
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 12:07 ` Ralf Baechle
2008-09-19 12:15 ` Maciej W. Rozycki
@ 2008-09-19 14:09 ` Atsushi Nemoto
2008-09-19 15:02 ` Maciej W. Rozycki
2008-09-20 15:09 ` Ralf Baechle
2008-09-23 21:52 ` Bryan Phillippe
2 siblings, 2 replies; 9+ messages in thread
From: Atsushi Nemoto @ 2008-09-19 14:09 UTC (permalink / raw)
To: ralf; +Cc: macro, u1, linux-mips, netdev
On Fri, 19 Sep 2008 14:07:52 +0200, Ralf Baechle <ralf@linux-mips.org> wrote:
> From 97ad23f4696a322cb3bc379a25a8c0f6526751d6 Mon Sep 17 00:00:00 2001
> From: Ralf Baechle <ralf@linux-mips.org>
> Date: Fri, 19 Sep 2008 14:05:53 +0200
> Subject: [PATCH] [MIPS] Fix 64-bit csum_partial, __csum_partial_copy_user and csum_partial_copy
... and __csum_partial_copy_nocheck, you mean? ;)
> On 64-bit machines it wouldn't handle a possible carry when adding the
> 32-bit folded checksum and checksum argument.
>
> While at it, add a few trivial optimizations, also for R2 processors.
I think it would be better splitting bugfix and optimization. This
code is too complex to do many things at a time, isn't it?
> @@ -53,12 +53,14 @@
> #define UNIT(unit) ((unit)*NBYTES)
>
> #define ADDC(sum,reg) \
> - .set push; \
> - .set noat; \
> ADD sum, reg; \
> sltu v1, sum, reg; \
> ADD sum, v1; \
> - .set pop
Is this required? Just a cleanup?
> @@ -254,8 +256,6 @@ LEAF(csum_partial)
> 1: ADDC(sum, t1)
>
> /* fold checksum */
> - .set push
> - .set noat
> #ifdef USE_DOUBLE
> dsll32 v1, sum, 0
> daddu sum, v1
> @@ -263,24 +263,25 @@ LEAF(csum_partial)
> dsra32 sum, sum, 0
> addu sum, v1
> #endif
> - sll v1, sum, 16
> - addu sum, v1
> - sltu v1, sum, v1
> - srl sum, sum, 16
> - addu sum, v1
>
> /* odd buffer alignment? */
> - beqz t7, 1f
> - nop
> - sll v1, sum, 8
> +#ifdef CPU_MIPSR2
> + wsbh v1, sum
> + movn sum, v1, t7
> +#else
> + beqz t7, 1f /* odd buffer alignment? */
> + lui v1, 0x00ff
> + addu v1, 0x00ff
> + and t0, sum, v1
> + sll t0, t0, 8
> srl sum, sum, 8
> - or sum, v1
> - andi sum, 0xffff
> - .set pop
> + and sum, sum, v1
> + or sum, sum, t0
> 1:
> +#endif
Is this just an optimization? or contain any fixes?
---
Atsushi Nemoto
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 14:09 ` Atsushi Nemoto
@ 2008-09-19 15:02 ` Maciej W. Rozycki
2008-09-20 15:09 ` Ralf Baechle
1 sibling, 0 replies; 9+ messages in thread
From: Maciej W. Rozycki @ 2008-09-19 15:02 UTC (permalink / raw)
To: Atsushi Nemoto; +Cc: ralf, u1, linux-mips, netdev
On Fri, 19 Sep 2008, Atsushi Nemoto wrote:
> I think it would be better splitting bugfix and optimization. This
> code is too complex to do many things at a time, isn't it?
It's probably easier for people to test a single patch now and it can
then be split at the commitment time. Of course if something actually
breaks, then keeping changes combined won't help tracking down the cause.
;)
Maciej
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 14:09 ` Atsushi Nemoto
2008-09-19 15:02 ` Maciej W. Rozycki
@ 2008-09-20 15:09 ` Ralf Baechle
1 sibling, 0 replies; 9+ messages in thread
From: Ralf Baechle @ 2008-09-20 15:09 UTC (permalink / raw)
To: Atsushi Nemoto; +Cc: macro, u1, linux-mips, netdev
On Fri, Sep 19, 2008 at 11:09:52PM +0900, Atsushi Nemoto wrote:
> I think it would be better splitting bugfix and optimization. This
> code is too complex to do many things at a time, isn't it?
>
> > @@ -53,12 +53,14 @@
> > #define UNIT(unit) ((unit)*NBYTES)
> >
> > #define ADDC(sum,reg) \
> > - .set push; \
> > - .set noat; \
> > ADD sum, reg; \
> > sltu v1, sum, reg; \
> > ADD sum, v1; \
> > - .set pop
>
> Is this required? Just a cleanup?
It papers over potencially important warnings so had to go. I argue the
caller of ADDC should set noat mode, if at all.
Ralf
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-19 12:07 ` Ralf Baechle
2008-09-19 12:15 ` Maciej W. Rozycki
2008-09-19 14:09 ` Atsushi Nemoto
@ 2008-09-23 21:52 ` Bryan Phillippe
2008-09-23 22:06 ` Ralf Baechle
2008-09-29 15:28 ` Atsushi Nemoto
2 siblings, 2 replies; 9+ messages in thread
From: Bryan Phillippe @ 2008-09-23 21:52 UTC (permalink / raw)
To: Ralf Baechle, Atsushi Nemoto; +Cc: Maciej W. Rozycki, linux-mips, netdev
On Sep 19, 2008, at 5:07 AM, Ralf Baechle wrote:
> On Fri, Sep 19, 2008 at 01:47:43PM +0200, Ralf Baechle wrote:
>
>> I'm interested in test reports of this on all sorts of
>> configurations -
>> 32-bit, 64-bit, big / little endian, R2 processors and pre-R2. In
>> particular Cavium being the only MIPS64 R2 implementation would be
>> interesting. This definately is stuff which should go upstream for
>> 2.6.27.
>
> There was a trivial bug in the R2 code.
>
>> From 97ad23f4696a322cb3bc379a25a8c0f6526751d6 Mon Sep 17 00:00:00
>> 2001
> From: Ralf Baechle <ralf@linux-mips.org>
> Date: Fri, 19 Sep 2008 14:05:53 +0200
> Subject: [PATCH] [MIPS] Fix 64-bit csum_partial,
> __csum_partial_copy_user and csum_partial_copy
>
> On 64-bit machines it wouldn't handle a possible carry when adding the
> 32-bit folded checksum and checksum argument.
>
> While at it, add a few trivial optimizations, also for R2 processors.
I tried this patch (with and without Atsushi's addition, shown below)
on a MIPS64 today and the checksums were all bad (i.e. worse than the
original problem).
Note that I had to manually create the diff, because of "malformed
patch" errors at line 21 (second hunk).
If anyone would like to send me an updated unified diff for this
issue, I can re-test today within the next day.
--
-bp
> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
>
> diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/
> csum_partial.S
> index 8d77841..c77a7a0 100644
> --- a/arch/mips/lib/csum_partial.S
> +++ b/arch/mips/lib/csum_partial.S
> @@ -53,12 +53,14 @@
> #define UNIT(unit) ((unit)*NBYTES)
>
> #define ADDC(sum,reg) \
> - .set push; \
> - .set noat; \
> ADD sum, reg; \
> sltu v1, sum, reg; \
> ADD sum, v1; \
> - .set pop
> +
> +#define ADDC32(sum,reg) \
> + addu sum, reg; \
> + sltu v1, sum, reg; \
> + addu sum, v1; \
>
> #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
> LOAD _t0, (offset + UNIT(0))(src); \
> @@ -254,8 +256,6 @@ LEAF(csum_partial)
> 1: ADDC(sum, t1)
>
> /* fold checksum */
> - .set push
> - .set noat
> #ifdef USE_DOUBLE
> dsll32 v1, sum, 0
> daddu sum, v1
> @@ -263,24 +263,25 @@ LEAF(csum_partial)
> dsra32 sum, sum, 0
> addu sum, v1
> #endif
> - sll v1, sum, 16
> - addu sum, v1
> - sltu v1, sum, v1
> - srl sum, sum, 16
> - addu sum, v1
>
> /* odd buffer alignment? */
> - beqz t7, 1f
> - nop
> - sll v1, sum, 8
> +#ifdef CPU_MIPSR2
> + wsbh v1, sum
> + movn sum, v1, t7
> +#else
> + beqz t7, 1f /* odd buffer alignment? */
> + lui v1, 0x00ff
> + addu v1, 0x00ff
> + and t0, sum, v1
> + sll t0, t0, 8
> srl sum, sum, 8
> - or sum, v1
> - andi sum, 0xffff
> - .set pop
> + and sum, sum, v1
> + or sum, sum, t0
> 1:
> +#endif
> .set reorder
> /* Add the passed partial csum. */
> - ADDC(sum, a2)
> + ADDC32(sum, a2)
> jr ra
> .set noreorder
> END(csum_partial)
> @@ -656,8 +657,6 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
> ADDC(sum, t2)
> .Ldone:
> /* fold checksum */
> - .set push
> - .set noat
> #ifdef USE_DOUBLE
> dsll32 v1, sum, 0
> daddu sum, v1
> @@ -665,23 +664,23 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
> dsra32 sum, sum, 0
> addu sum, v1
> #endif
> - sll v1, sum, 16
> - addu sum, v1
> - sltu v1, sum, v1
> - srl sum, sum, 16
> - addu sum, v1
>
> - /* odd buffer alignment? */
> - beqz odd, 1f
> - nop
> - sll v1, sum, 8
> +#ifdef CPU_MIPSR2
> + wsbh v1, sum
> + movn sum, v1, odd
> +#else
> + beqz odd, 1f /* odd buffer alignment? */
> + lui v1, 0x00ff
> + addu v1, 0x00ff
> + and t0, sum, v1
> + sll t0, t0, 8
> srl sum, sum, 8
> - or sum, v1
> - andi sum, 0xffff
> - .set pop
> + and sum, sum, v1
> + or sum, sum, t0
> 1:
> +#endif
> .set reorder
> - ADDC(sum, psum)
> + ADDC32(sum, psum)
> jr ra
> .set noreorder
>
>
Begin forwarded message:
> From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
> Date: September 19, 2008 8:43:19 AM PDT
> To: u1@terran.org
> Cc: macro@linux-mips.org, linux-mips@linux-mips.org
> Subject: Re: MIPS checksum bug
>
> On Fri, 19 Sep 2008 01:17:04 +0900 (JST), Atsushi Nemoto <anemo@mba.ocn.ne.jp
> > wrote:
>> Thank you for testing. Though this patch did not fixed your problem,
>> I still have a doubt on 64-bit optimization.
>>
>> If your hardware could run 32-bit kernel, could you confirm the
>> problem can happens in 32-bit too or not?
>
> I think I found possible breakage on 64-bit path.
>
> There are some "lw" (and "ulw") used in 64-bit path and they should be
> "lwu" (and "ulwu" ... but there is no such pseudo insn) to avoid
> sign-extention.
>
> Here is a completely untested patch.
>
> diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/
> csum_partial.S
> index 8d77841..40f9174 100644
> --- a/arch/mips/lib/csum_partial.S
> +++ b/arch/mips/lib/csum_partial.S
> @@ -39,12 +39,14 @@
> #ifdef USE_DOUBLE
>
> #define LOAD ld
> +#define LOAD32 lwu
> #define ADD daddu
> #define NBYTES 8
>
> #else
>
> #define LOAD lw
> +#define LOAD32 lw
> #define ADD addu
> #define NBYTES 4
>
> @@ -60,6 +62,14 @@
> ADD sum, v1; \
> .set pop
>
> +#define ADDC32(sum,reg) \
> + .set push; \
> + .set noat; \
> + addu sum, reg; \
> + sltu v1, sum, reg; \
> + addu sum, v1; \
> + .set pop
> +
> #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
> LOAD _t0, (offset + UNIT(0))(src); \
> LOAD _t1, (offset + UNIT(1))(src); \
> @@ -132,7 +142,7 @@ LEAF(csum_partial)
> beqz t8, .Lqword_align
> andi t8, src, 0x8
>
> - lw t0, 0x00(src)
> + LOAD32 t0, 0x00(src)
> LONG_SUBU a1, a1, 0x4
> ADDC(sum, t0)
> PTR_ADDU src, src, 0x4
> @@ -211,7 +221,7 @@ LEAF(csum_partial)
> LONG_SRL t8, t8, 0x2
>
> .Lend_words:
> - lw t0, (src)
> + LOAD32 t0, (src)
> LONG_SUBU t8, t8, 0x1
> ADDC(sum, t0)
> .set reorder /* DADDI_WAR */
> @@ -229,6 +239,9 @@ LEAF(csum_partial)
>
> /* Still a full word to go */
> ulw t1, (src)
> +#ifdef USE_DOUBLE
> + add t1, zero /* clear upper 32bit */
> +#endif
> PTR_ADDIU src, 4
> ADDC(sum, t1)
>
> @@ -280,7 +293,7 @@ LEAF(csum_partial)
> 1:
> .set reorder
> /* Add the passed partial csum. */
> - ADDC(sum, a2)
> + ADDC32(sum, a2)
> jr ra
> .set noreorder
> END(csum_partial)
> @@ -681,7 +694,7 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
> .set pop
> 1:
> .set reorder
> - ADDC(sum, psum)
> + ADDC32(sum, psum)
> jr ra
> .set noreorder
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-23 21:52 ` Bryan Phillippe
@ 2008-09-23 22:06 ` Ralf Baechle
2008-09-29 15:28 ` Atsushi Nemoto
1 sibling, 0 replies; 9+ messages in thread
From: Ralf Baechle @ 2008-09-23 22:06 UTC (permalink / raw)
To: Bryan Phillippe; +Cc: Atsushi Nemoto, Maciej W. Rozycki, linux-mips, netdev
On Tue, Sep 23, 2008 at 02:52:24PM -0700, Bryan Phillippe wrote:
> If anyone would like to send me an updated unified diff for this issue, I
> can re-test today within the next day.
An updated fix is already in the lmo and kernel.org git repositories.
Ralf
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] MIPS checksum fix
2008-09-23 21:52 ` Bryan Phillippe
2008-09-23 22:06 ` Ralf Baechle
@ 2008-09-29 15:28 ` Atsushi Nemoto
1 sibling, 0 replies; 9+ messages in thread
From: Atsushi Nemoto @ 2008-09-29 15:28 UTC (permalink / raw)
To: u1; +Cc: ralf, macro, linux-mips, netdev
On Tue, 23 Sep 2008 14:52:24 -0700, Bryan Phillippe <u1@terran.org> wrote:
>
> I tried this patch (with and without Atsushi's addition, shown below)
> on a MIPS64 today and the checksums were all bad (i.e. worse than the
> original problem).
>
> Note that I had to manually create the diff, because of "malformed
> patch" errors at line 21 (second hunk).
>
> If anyone would like to send me an updated unified diff for this
> issue, I can re-test today within the next day.
I suppose your problem is still not fixed, right?
If so, could you try this patch? With this patch, checksums is always
compared with a result of "reference" implementation. If any mismatch
was found, please report the log.
arch/mips/lib/Makefile | 1 +
arch/mips/lib/csum_partial.S | 4 +-
arch/mips/lib/csum_partial_test.c | 109 +++++++++++++++++++++++++++++++++++++
3 files changed, 112 insertions(+), 2 deletions(-)
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 8810dfb..cb56e19 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_CPU_VR41XX) += dump_tlb.o
# libgcc-style stuff needed in the kernel
obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += csum_partial_test.o
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index edac989..22e9767 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -101,7 +101,7 @@
.text
.set noreorder
.align 5
-LEAF(csum_partial)
+LEAF(asm_csum_partial)
move sum, zero
move t7, zero
@@ -296,7 +296,7 @@ LEAF(csum_partial)
ADDC32(sum, a2)
jr ra
.set noreorder
- END(csum_partial)
+ END(asm_csum_partial)
/*
diff --git a/arch/mips/lib/csum_partial_test.c b/arch/mips/lib/csum_partial_test.c
new file mode 100644
index 0000000..4a4887e
--- /dev/null
+++ b/arch/mips/lib/csum_partial_test.c
@@ -0,0 +1,109 @@
+#include <net/checksum.h>
+
+static inline __sum16 ref_csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+static unsigned int do_csum(const unsigned char * buff, int len)
+{
+ int odd, count;
+ unsigned int result = 0;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+#ifdef __BIG_ENDIAN
+ result = *buff;
+#else
+ result = *buff << 8;
+#endif
+ len--;
+ buff++;
+ }
+ count = len >> 1; /* nr of 16-bit words.. */
+ if (count) {
+ if (2 & (unsigned long) buff) {
+ result += *(unsigned short *) buff;
+ count--;
+ len -= 2;
+ buff += 2;
+ }
+ count >>= 1; /* nr of 32-bit words.. */
+ if (count) {
+ unsigned int carry = 0;
+ do {
+ unsigned int w = *(unsigned int *) buff;
+ count--;
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (count);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1) {
+#ifdef __BIG_ENDIAN
+ result += (*buff << 8);
+#else
+ result += *buff;
+#endif
+ }
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+
+static __wsum ref_csum_partial(const void *buff, int len, __wsum sum)
+{
+ unsigned int result = do_csum(buff, len);
+
+ /* add in old sum, and carry.. */
+ result += (__force u32)sum;
+ if ((__force u32)sum > result)
+ result += 1;
+ return (__force __wsum)result;
+}
+
+__wsum asm_csum_partial(const void *buff, int len, __wsum sum);
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+ __wsum ref_wsum = ref_csum_partial(buff, len, sum);
+ __sum16 ref_sum = ref_csum_fold(ref_wsum);
+ __wsum cal_wsum = asm_csum_partial(buff, len, sum);
+ __sum16 cal_sum = csum_fold(cal_wsum);
+ if (ref_sum != cal_sum) {
+ int i;
+ printk("csum_partial error."
+ " %#04x(%#08x) != %#04x(%#08x)\n",
+ ref_sum, ref_wsum, cal_sum, cal_wsum);
+ printk("len %#04x, sum %#08x\n", len, sum);
+ printk("buf");
+ for (i = 0; i < len; i++)
+ printk(" %#02x", *((const u8 *)buff + i));
+ printk("\n");
+ }
+ return ref_wsum;
+}
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2008-09-29 15:29 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <Pine.LNX.4.55.0809171104290.17103@cliff.in.clinika.pl>
[not found] ` <20080917.222350.41199051.anemo@mba.ocn.ne.jp>
[not found] ` <Pine.LNX.4.55.0809171501450.17103@cliff.in.clinika.pl>
[not found] ` <20080918.002705.78730226.anemo@mba.ocn.ne.jp>
[not found] ` <Pine.LNX.4.55.0809171917580.17103@cliff.in.clinika.pl>
[not found] ` <20080918220734.GA19222@linux-mips.org>
[not found] ` <Pine.LNX.4.55.0809190112090.22686@cliff.in.clinika.pl>
[not found] ` <20080919112304.GB13440@linux-mips.org>
2008-09-19 11:47 ` [PATCH] MIPS checksum fix Ralf Baechle
2008-09-19 12:07 ` Ralf Baechle
2008-09-19 12:15 ` Maciej W. Rozycki
2008-09-19 14:09 ` Atsushi Nemoto
2008-09-19 15:02 ` Maciej W. Rozycki
2008-09-20 15:09 ` Ralf Baechle
2008-09-23 21:52 ` Bryan Phillippe
2008-09-23 22:06 ` Ralf Baechle
2008-09-29 15:28 ` Atsushi Nemoto
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).