public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls
@ 2017-09-27 19:25 Vijay Kumar
  2017-09-27 19:25 ` [PATCH v2 1/2] sparc64: Define SPARC default fls " Vijay Kumar
  2017-09-27 19:25 ` [PATCH v2 2/2] sparc64: Use lzcnt instruction for " Vijay Kumar
  0 siblings, 2 replies; 10+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.

v1->v2: 
 - Fixed delay slot issue pointed by Rob Gardner in patch 2/2.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile                |    1 +
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/kernel/head_64.S        |    2 +
 arch/sparc/lib/Makefile            |    4 +
 arch/sparc/lib/NG4fls.S            |   31 +++++++++
 arch/sparc/lib/NG4patch.S          |    9 +++
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:25 [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls Vijay Kumar
@ 2017-09-27 19:25 ` Vijay Kumar
  2017-09-27 19:50   ` Sam Ravnborg
  2017-09-27 19:25 ` [PATCH v2 2/2] sparc64: Use lzcnt instruction for " Vijay Kumar
  1 sibling, 1 reply; 10+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include <asm/bitsperlong.h>
+#include <asm/export.h>
+
+	.text
+	.align	32
+
+	.global	fls, __fls
+	.type	fls,	#function
+	.type	__fls,	#function
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+	.size	fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+#endif
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-16), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-8), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, (BITS_PER_LONG-4), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, (BITS_PER_LONG-2), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, (BITS_PER_LONG-1), %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+	.size	__fls, .-__fls
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:25 [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls Vijay Kumar
  2017-09-27 19:25 ` [PATCH v2 1/2] sparc64: Define SPARC default fls " Vijay Kumar
@ 2017-09-27 19:25 ` Vijay Kumar
  2017-09-27 19:56   ` Sam Ravnborg
  1 sibling, 1 reply; 10+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/Makefile         |    1 +
 arch/sparc/kernel/head_64.S |    2 ++
 arch/sparc/lib/Makefile     |    3 +++
 arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
 arch/sparc/lib/NG4patch.S   |    9 +++++++++
 5 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 	 nop
 	call	niagara4_patch_pageops
 	 nop
+	call	niagara4_patch_fls
+	 nop
 
 	ba,a,pt	%xcc, 80f
 	 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..eb239aa
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+	.text
+	.align 32
+
+	.globl NG4fls
+	.globl __NG4fls
+	.type  NG4fls, #function
+	.type  __NG4fls, #function
+
+NG4fls:
+	lzcnt   %o0, %o1
+	mov     64, %o2
+	retl
+	 sub     %o2, %o1, %o0
+	.size   NG4fls, .-NG4fls
+
+__NG4fls:
+	brz,pn  %o0, 1f
+        mov	%o0, %o1
+	lzcnt	%o1, %o0
+	mov     63, %o2
+	sub     %o2, %o0, %o0
+1:
+	retl
+	 nop
+	.size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
 	retl
 	 nop
 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+	.globl	niagara4_patch_fls
+	.type	niagara4_patch_fls,#function
+niagara4_patch_fls:
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+	.size	niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:25 ` [PATCH v2 1/2] sparc64: Define SPARC default fls " Vijay Kumar
@ 2017-09-27 19:50   ` Sam Ravnborg
  2017-09-27 19:59     ` Sam Ravnborg
  0 siblings, 1 reply; 10+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:50 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

Some feedback - see below.
The comment about ENTRY() ENDPROC() is also valid for patch 2/2

	Sam

> 
> diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
> index 2d52240..946c236 100644
> --- a/arch/sparc/include/asm/bitops_64.h
> +++ b/arch/sparc/include/asm/bitops_64.h
> @@ -22,11 +22,12 @@
>  void clear_bit(unsigned long nr, volatile unsigned long *addr);
>  void change_bit(unsigned long nr, volatile unsigned long *addr);
>  
> +#define fls64(word)  (((word)?(__fls(word) + 1):0))
This macro could result in unwanted sideeffects.
If I use:

	fls64(i++)

for some obscure reason, then i will be incremented twice if i != 0.
Using the asm-generic version would be better.

> +int fls(unsigned int word);
> +int __fls(unsigned long word);
> +
>  #include <asm-generic/bitops/non-atomic.h>
>  
> -#include <asm-generic/bitops/fls.h>
> -#include <asm-generic/bitops/__fls.h>
> -#include <asm-generic/bitops/fls64.h>
>  
>  #ifdef __KERNEL__
>  
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index 07c03e7..eefbb9c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
>  lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
>  lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
>  lib-$(CONFIG_SPARC64) += multi3.o
> +lib-$(CONFIG_SPARC64) += fls.o
>  
>  lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
>  lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
> diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
> new file mode 100644
> index 0000000..a19bff2
> --- /dev/null
> +++ b/arch/sparc/lib/fls.S
> @@ -0,0 +1,126 @@
> +/* fls.S: SPARC default fls and __fls definitions.
> + *
> + * SPARC default fls and __fls definitions, which follows the same
> + * algorithm as in generic fls() and __fls(). These functions will
> + * be boot time patched on T4 and onward.
> + */
> +
> +#include <asm/bitsperlong.h>
> +#include <asm/export.h>
> +
> +	.text
> +	.align	32
> +
> +	.global	fls, __fls
> +	.type	fls,	#function
> +	.type	__fls,	#function
> +
> +	.register	%g2, #scratch
> +	.register	%g3, #scratch
> +
> +EXPORT_SYMBOL(__fls)
> +EXPORT_SYMBOL(fls)
> +
> +fls:
Use ENTRY(), ENDPROC() for assembler functions.
> +	brz,pn	%o0, 6f
> +	 mov	0, %o1
> +	sethi	%hi(0xffff0000), %g3
> +	mov	%o0, %g2
> +	andcc	%o0, %g3, %g0
> +	be,pt	%icc, 8f
> +	 mov	32, %o1
> +	sethi	%hi(0xff000000), %g3
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 3f
> +	 sethi	%hi(0xf0000000), %g3
> +	sll	%o0, 8, %o0
> +1:
> +	add	%o1, -8, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +2:
> +	sethi	%hi(0xf0000000), %g3
> +3:
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 4f
> +	 sethi	%hi(0xc0000000), %g3
> +	sll	%o0, 4, %o0
> +	add	%o1, -4, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +4:
> +	andcc	%g2, %g3, %g0
> +	be,a,pt	%icc, 7f
> +	 sll	%o0, 2, %o0
> +5:
> +	xnor	%g0, %o0, %o0
> +	srl	%o0, 31, %o0
> +	sub	%o1, %o0, %o1
> +6:
> +	jmp	%o7 + 8
> +	 sra	%o1, 0, %o0
> +7:
> +	add	%o1, -2, %o1
> +	ba,pt	%xcc, 5b
> +	 sra	%o0, 0, %o0
> +8:
> +	sll	%o0, 16, %o0
> +	sethi	%hi(0xff000000), %g3
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 2b
> +	 mov	16, %o1
> +	ba,pt	%xcc, 1b
> +	 sll	%o0, 8, %o0
> +	.size	fls, .-fls
> +
> +__fls:
Same here, use ENTRY(), ENDPROC()
> +#if BITS_PER_LONG == 64
> +	mov	-1, %g2
> +	sllx	%g2, 32, %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 1f
> +	 mov	63, %g1
> +	sllx	%o0, 32, %o0
> +#endif

Testign for BITS_PER_LONG seems not necessary as long as this is sparc64 only.
And sparc32 has no optimized bit operations not even LEON
so this would not make sense in sparc32 land anyway.

> +	mov	31, %g1
> +1:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-16), %g2
spaces around operators please. It is no excuse that the source did not have so.

> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 2f
> +	 mov	-1, %g2
> +	sllx	%o0, 16, %o0
> +	add	%g1, -16, %g1
> +2:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-8), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 3f
> +	 mov	-1, %g2
> +	sllx	%o0, 8, %o0
> +	add	%g1, -8, %g1
> +3:
> +	sllx	%g2, (BITS_PER_LONG-4), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 4f
> +	 mov	-1, %g2
> +	sllx	%o0, 4, %o0
> +	add	%g1, -4, %g1
> +4:
> +	sllx	%g2, (BITS_PER_LONG-2), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 5f
> +	 mov	-1, %g3
> +	sllx	%o0, 2, %o0
> +	add	%g1, -2, %g1
> +5:
> +	mov	0, %g2
> +	sllx	%g3, (BITS_PER_LONG-1), %g3
> +	and	%o0, %g3, %o0
> +	movre	%o0, 1, %g2
> +	sub	%g1, %g2, %g1
> +	jmp	%o7+8
> +	 sra	%g1, 0, %o0
> +	.size	__fls, .-__fls

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:25 ` [PATCH v2 2/2] sparc64: Use lzcnt instruction for " Vijay Kumar
@ 2017-09-27 19:56   ` Sam Ravnborg
  2017-09-27 20:29     ` Vijay Kumar
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:56 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
> For T4 and above, patch fls and __fls functions
> at the boot time to use lzcnt instruction.
> 
> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
> Reviewed-by: Babu Moger <babu.moger@oracle.com>
> ---
>  arch/sparc/Makefile         |    1 +
>  arch/sparc/kernel/head_64.S |    2 ++
>  arch/sparc/lib/Makefile     |    3 +++
>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>  5 files changed, 45 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
> index 8496a07..0763cd8 100644
> --- a/arch/sparc/Makefile
> +++ b/arch/sparc/Makefile
> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
> +KBUILD_AFLAGS += -Wa,-Asparc4
This change is not justified anywhere??

>  
>  ifeq ($(CONFIG_MCOUNT),y)
>    KBUILD_CFLAGS += -pg
> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
> index 78e0211..1165254 100644
> --- a/arch/sparc/kernel/head_64.S
> +++ b/arch/sparc/kernel/head_64.S
> @@ -628,6 +628,8 @@ niagara4_patch:
>  	 nop
>  	call	niagara4_patch_pageops
>  	 nop
> +	call	niagara4_patch_fls
> +	 nop
>  
>  	ba,a,pt	%xcc, 80f
>  	 nop
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index eefbb9c..72d2d8c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>  obj-$(CONFIG_SPARC64) += iomap.o
>  obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>  obj-$(CONFIG_SPARC64) += PeeCeeI.o
> +
> +obj-$(CONFIG_SPARC64) += fls.o
> +obj-$(CONFIG_SPARC64) += NG4fls.o
> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
> new file mode 100644
> index 0000000..eb239aa
> --- /dev/null
> +++ b/arch/sparc/lib/NG4fls.S
> @@ -0,0 +1,30 @@
> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
> + *
> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +	.text
> +	.align 32
> +
> +	.globl NG4fls
> +	.globl __NG4fls
> +	.type  NG4fls, #function
> +	.type  __NG4fls, #function

Use ENTRY(), ENDPROC() as already mentioned.
> +NG4fls:
> +	lzcnt   %o0, %o1
> +	mov     64, %o2
> +	retl
> +	 sub     %o2, %o1, %o0
> +	.size   NG4fls, .-NG4fls
> +
> +__NG4fls:
> +	brz,pn  %o0, 1f
> +        mov	%o0, %o1
Use tabs for indent, not spaces. In this case one tab + one space.

> +	lzcnt	%o1, %o0
> +	mov     63, %o2
> +	sub     %o2, %o0, %o0
> +1:
> +	retl
> +	 nop
> +	.size   __NG4fls, .-__NG4fls
> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
> index 3cc0f8c..1010d53 100644
> --- a/arch/sparc/lib/NG4patch.S
> +++ b/arch/sparc/lib/NG4patch.S
> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>  	retl
>  	 nop
>  	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
> +
> +	.globl	niagara4_patch_fls
> +	.type	niagara4_patch_fls,#function
> +niagara4_patch_fls:
> +	NG_DO_PATCH(fls, NG4fls)
> +	NG_DO_PATCH(__fls, __NG4fls)
> +	retl
> +	 nop
> +	.size	niagara4_patch_fls,.-niagara4_patch_fls

Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

	Sam

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:50   ` Sam Ravnborg
@ 2017-09-27 19:59     ` Sam Ravnborg
  0 siblings, 0 replies; 10+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:59 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

> > +
> > +EXPORT_SYMBOL(__fls)
> > +EXPORT_SYMBOL(fls)
> > +
> > +fls:
> Use ENTRY(), ENDPROC() for assembler functions.
And locate EXPORT_SYMBOL() right after ENDPROC().

	Sam

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56   ` Sam Ravnborg
@ 2017-09-27 20:29     ` Vijay Kumar
  2017-09-27 21:02     ` David Miller
  2017-09-27 21:45     ` Anthony Yznaga
  2 siblings, 0 replies; 10+ messages in thread
From: Vijay Kumar @ 2017-09-27 20:29 UTC (permalink / raw)
  To: Sam Ravnborg; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Sam,

On 9/27/2017 2:56 PM, Sam Ravnborg wrote:
>>   	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

Thanks for your comment. Sure, I will make the changes and address other 
comments as well in my revised version.

- Vijay

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56   ` Sam Ravnborg
  2017-09-27 20:29     ` Vijay Kumar
@ 2017-09-27 21:02     ` David Miller
  2017-09-27 21:45     ` Anthony Yznaga
  2 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2017-09-27 21:02 UTC (permalink / raw)
  To: sam; +Cc: vijay.ac.kumar, linux-kernel, sparclinux, babu.moger, rob.gardner

From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 27 Sep 2017 21:56:37 +0200

> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>>  arch/sparc/Makefile         |    1 +
>>  arch/sparc/kernel/head_64.S |    2 ++
>>  arch/sparc/lib/Makefile     |    3 +++
>>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>>  5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It also will likely break with older tools.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56   ` Sam Ravnborg
  2017-09-27 20:29     ` Vijay Kumar
  2017-09-27 21:02     ` David Miller
@ 2017-09-27 21:45     ` Anthony Yznaga
  2017-09-27 22:10       ` David Miller
  2 siblings, 1 reply; 10+ messages in thread
From: Anthony Yznaga @ 2017-09-27 21:45 UTC (permalink / raw)
  To: Sam Ravnborg
  Cc: Vijay Kumar, davem, linux-kernel, sparclinux, babu.moger,
	rob.gardner


> On Sep 27, 2017, at 12:56 PM, Sam Ravnborg <sam@ravnborg.org> wrote:
> 
> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>> arch/sparc/Makefile         |    1 +
>> arch/sparc/kernel/head_64.S |    2 ++
>> arch/sparc/lib/Makefile     |    3 +++
>> arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>> arch/sparc/lib/NG4patch.S   |    9 +++++++++
>> 5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>> KBUILD_CFLAGS += -Wa,--undeclared-regs
>> KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>> KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It looks like this is to get the assembler to recognize the lzcnt instruction.

Vijay,
Older assemblers may not support this flag so you’ll need to hardcode the lzcnt instructions using .word directives.

Anthony

> 
>> 
>> ifeq ($(CONFIG_MCOUNT),y)
>>   KBUILD_CFLAGS += -pg
>> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
>> index 78e0211..1165254 100644
>> --- a/arch/sparc/kernel/head_64.S
>> +++ b/arch/sparc/kernel/head_64.S
>> @@ -628,6 +628,8 @@ niagara4_patch:
>> 	 nop
>> 	call	niagara4_patch_pageops
>> 	 nop
>> +	call	niagara4_patch_fls
>> +	 nop
>> 
>> 	ba,a,pt	%xcc, 80f
>> 	 nop
>> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
>> index eefbb9c..72d2d8c 100644
>> --- a/arch/sparc/lib/Makefile
>> +++ b/arch/sparc/lib/Makefile
>> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>> obj-$(CONFIG_SPARC64) += iomap.o
>> obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>> obj-$(CONFIG_SPARC64) += PeeCeeI.o
>> +
>> +obj-$(CONFIG_SPARC64) += fls.o
>> +obj-$(CONFIG_SPARC64) += NG4fls.o
>> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
>> new file mode 100644
>> index 0000000..eb239aa
>> --- /dev/null
>> +++ b/arch/sparc/lib/NG4fls.S
>> @@ -0,0 +1,30 @@
>> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
>> + *
>> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
>> + */
>> +
>> +	.text
>> +	.align 32
>> +
>> +	.globl NG4fls
>> +	.globl __NG4fls
>> +	.type  NG4fls, #function
>> +	.type  __NG4fls, #function
> 
> Use ENTRY(), ENDPROC() as already mentioned.
>> +NG4fls:
>> +	lzcnt   %o0, %o1
>> +	mov     64, %o2
>> +	retl
>> +	 sub     %o2, %o1, %o0
>> +	.size   NG4fls, .-NG4fls
>> +
>> +__NG4fls:
>> +	brz,pn  %o0, 1f
>> +        mov	%o0, %o1
> Use tabs for indent, not spaces. In this case one tab + one space.
> 
>> +	lzcnt	%o1, %o0
>> +	mov     63, %o2
>> +	sub     %o2, %o0, %o0
>> +1:
>> +	retl
>> +	 nop
>> +	.size   __NG4fls, .-__NG4fls
>> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
>> index 3cc0f8c..1010d53 100644
>> --- a/arch/sparc/lib/NG4patch.S
>> +++ b/arch/sparc/lib/NG4patch.S
>> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>> 	retl
>> 	 nop
>> 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> 
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.
> 
> 	Sam
> --
> To unsubscribe from this list: send the line "unsubscribe sparclinux" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 21:45     ` Anthony Yznaga
@ 2017-09-27 22:10       ` David Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David Miller @ 2017-09-27 22:10 UTC (permalink / raw)
  To: anthony.yznaga
  Cc: sam, vijay.ac.kumar, linux-kernel, sparclinux, babu.moger,
	rob.gardner

From: Anthony Yznaga <anthony.yznaga@oracle.com>
Date: Wed, 27 Sep 2017 14:45:34 -0700

> Vijay,
> Older assemblers may not support this flag so you’ll need to
> hardcode the lzcnt instructions using .word directives.

Right, older binutils do not support the T4 instructions.

This is why we hardcode the opcodes for all of the crypto
instructions used under arch/sparc64/crypto/, for example.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-09-27 22:10 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-27 19:25 [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls Vijay Kumar
2017-09-27 19:25 ` [PATCH v2 1/2] sparc64: Define SPARC default fls " Vijay Kumar
2017-09-27 19:50   ` Sam Ravnborg
2017-09-27 19:59     ` Sam Ravnborg
2017-09-27 19:25 ` [PATCH v2 2/2] sparc64: Use lzcnt instruction for " Vijay Kumar
2017-09-27 19:56   ` Sam Ravnborg
2017-09-27 20:29     ` Vijay Kumar
2017-09-27 21:02     ` David Miller
2017-09-27 21:45     ` Anthony Yznaga
2017-09-27 22:10       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox