All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] MIPS: lib: Optimize partial checksum ops using prefetching.
@ 2014-01-21 16:18 Steven J. Hill
  2014-01-21 17:37 ` Florian Fainelli
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Steven J. Hill @ 2014-01-21 16:18 UTC (permalink / raw)
  To: linux-mips; +Cc: ralf

From: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>

Use the PREF instruction to optimize partial checksum operations.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
Signed-off-by: Steven J. Hill <Steven.Hill@imgtec.com>
---
 arch/mips/lib/csum_partial.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index a6adffb..272820e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -417,13 +417,19 @@ FEXPORT(csum_partial_copy_nocheck)
 	 *
 	 * If len < NBYTES use byte operations.
 	 */
+	PREF(	0, 0(src))
+	PREF(	1, 0(dst))
 	sltu	t2, len, NBYTES
 	and	t1, dst, ADDRMASK
 	bnez	t2, .Lcopy_bytes_checklen
+	PREF(	0, 32(src))
+	PREF(	1, 32(dst))
 	 and	t0, src, ADDRMASK
 	andi	odd, dst, 0x1			/* odd buffer? */
 	bnez	t1, .Ldst_unaligned
 	 nop
+	PREF(	0, 2*32(src))
+	PREF(	1, 2*32(dst))
 	bnez	t0, .Lsrc_unaligned_dst_aligned
 	/*
 	 * use delay slot for fall-through
@@ -434,6 +440,8 @@ FEXPORT(csum_partial_copy_nocheck)
 	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES
 	 nop
 	SUB	len, 8*NBYTES		# subtract here for bgez loop
+	PREF(	0, 3*32(src))
+	PREF(	1, 3*32(dst))
 	.align	4
 1:
 EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
@@ -464,6 +472,8 @@ EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc)
 	ADDC(sum, t7)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 8*NBYTES
+	PREF(	0, 8*32(src))
+	PREF(	1, 8*32(dst))
 	bgez	len, 1b
 	.set	noreorder
 	ADD	len, 8*NBYTES		# revert len (see above)
@@ -569,8 +579,10 @@ EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)
 
 .Lsrc_unaligned_dst_aligned:
 	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
+	PREF(	0, 3*32(src))
 	beqz	t0, .Lcleanup_src_unaligned
 	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
+	PREF(	1, 3*32(dst))
 1:
 /*
  * Avoid consecutive LD*'s to the same register since some mips
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-01-21 21:03 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-21 16:18 [PATCH] MIPS: lib: Optimize partial checksum ops using prefetching Steven J. Hill
2014-01-21 17:37 ` Florian Fainelli
2014-01-21 18:25 ` David Daney
2014-01-21 20:16   ` Steven J. Hill
2014-01-21 20:25     ` Florian Fainelli
2014-01-21 20:49 ` Ralf Baechle
2014-01-21 20:58   ` Steven J. Hill
2014-01-21 20:58     ` Steven J. Hill
2014-01-21 21:03     ` David Daney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.