All of lore.kernel.org
 help / color / mirror / Atom feed
From: sboyd@codeaurora.org (Stephen Boyd)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2] ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions
Date: Fri,  8 Nov 2013 15:00:32 -0800	[thread overview]
Message-ID: <1383951632-6090-1-git-send-email-sboyd@codeaurora.org> (raw)

If we're running on a v7 ARM CPU, detect if the CPU supports the
sdiv/udiv instructions and replace the signed and unsigned
division library functions with an sdiv/udiv instruction.

Running the perf messaging benchmark in pipe mode

 $ perf bench sched messaging -p

shows a modest improvement on my v7 CPU.

before:
(5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805

after:
(4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538

(5.805 - 5.538) / 5.805 = 4.6%

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---

Changes since v1:
 * Replace signed with unsigned in unsigned divide function
 * drop & in inline assembly
 * Use IS_ENABLED() instead of #ifdef
 * Pass DIV_V7 into lib1funcs.S instead of depending on ZIMAGE or CPU_V7

 arch/arm/kernel/setup.c  | 13 ++++++++++-
 arch/arm/lib/Makefile    |  6 +++++
 arch/arm/lib/div-v7.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/lib/lib1funcs.S | 16 +++++++++++++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/lib/div-v7.c

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 0e1e2b3..f9e577a 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -30,6 +30,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/sort.h>
+#include <linux/static_key.h>
 
 #include <asm/unified.h>
 #include <asm/cp15.h>
@@ -365,9 +366,11 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE;
+
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs, vmsa;
+	unsigned int divide_instrs, vmsa, idiv_mask;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
@@ -381,6 +384,14 @@ static void __init cpuid_init_hwcaps(void)
 		elf_hwcap |= HWCAP_IDIVT;
 	}
 
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
+		idiv_mask = HWCAP_IDIVT;
+	else
+		idiv_mask = HWCAP_IDIVA;
+
+	if (elf_hwcap & idiv_mask)
+		static_key_slow_inc(&cpu_has_idiv);
+
 	/* LPAE implies atomic ldrd/strd instructions */
 	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
 	if (vmsa >= 5)
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index bd454b0..38621729 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -15,6 +15,12 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 		   call_with_stack.o
 
+lib-$(CONFIG_CPU_V7) += div-v7.o
+CFLAGS_div-v7.o := -march=armv7-a
+ifeq ($(CONFIG_CPU_V7),y)
+  AFLAGS_lib1funcs.o := -DDIV_V7
+endif
+
 mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
 
 # the code in uaccess.S is not preemption safe and
diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c
new file mode 100644
index 0000000..e20945a
--- /dev/null
+++ b/arch/arm/lib/div-v7.c
@@ -0,0 +1,58 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/static_key.h>
+
+extern int ___aeabi_idiv(int, int);
+extern unsigned ___aeabi_uidiv(int, int);
+
+extern struct static_key cpu_has_idiv;
+
+int __aeabi_idiv(int numerator, int denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		int ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"sdiv %0, %1, %2"
+		: "=r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_idiv(numerator, denominator);
+}
+
+int __divsi3(int numerator, int denominator)
+	__attribute__((alias("__aeabi_idiv")));
+
+unsigned __aeabi_uidiv(unsigned numerator, unsigned denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		unsigned ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"udiv %0, %1, %2"
+		: "=r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_uidiv(numerator, denominator);
+}
+
+unsigned __udivsi3(unsigned numerator, unsigned denominator)
+	__attribute__((alias("__aeabi_uidiv")));
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f64..82bbcc7 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -205,8 +205,12 @@ Boston, MA 02111-1307, USA.  */
 .endm
 
 
+#ifdef DIV_V7
+ENTRY(___aeabi_uidiv)
+#else
 ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)
+#endif
 UNWIND(.fnstart)
 
 	subs	r2, r1, #1
@@ -232,8 +236,12 @@ UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#ifdef DIV_V7
+ENDPROC(___aeabi_uidiv)
+#else
 ENDPROC(__udivsi3)
 ENDPROC(__aeabi_uidiv)
+#endif
 
 ENTRY(__umodsi3)
 UNWIND(.fnstart)
@@ -253,8 +261,12 @@ UNWIND(.fnstart)
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
 
+#ifdef DIV_V7
+ENTRY(___aeabi_idiv)
+#else
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
+#endif
 UNWIND(.fnstart)
 
 	cmp	r1, #0
@@ -293,8 +305,12 @@ UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#ifdef DIV_V7
+ENDPROC(___aeabi_idiv)
+#else
 ENDPROC(__divsi3)
 ENDPROC(__aeabi_idiv)
+#endif
 
 ENTRY(__modsi3)
 UNWIND(.fnstart)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

WARNING: multiple messages have this Message-ID (diff)
From: Stephen Boyd <sboyd@codeaurora.org>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org,
	"Jean-Christophe PLAGNIOL-VILLARD" <plagnioj@jcrosoft.com>,
	"Christopher Covington" <cov@codeaurora.org>,
	"Russell King - ARM Linux" <linux@arm.linux.org.uk>,
	"Måns Rullgård" <mans@mansr.com>,
	"Rob Herring" <robherring2@gmail.com>
Subject: [PATCH v2] ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions
Date: Fri,  8 Nov 2013 15:00:32 -0800	[thread overview]
Message-ID: <1383951632-6090-1-git-send-email-sboyd@codeaurora.org> (raw)

If we're running on a v7 ARM CPU, detect if the CPU supports the
sdiv/udiv instructions and replace the signed and unsigned
division library functions with an sdiv/udiv instruction.

Running the perf messaging benchmark in pipe mode

 $ perf bench sched messaging -p

shows a modest improvement on my v7 CPU.

before:
(5.060 + 5.960 + 5.971 + 5.643 + 6.029 + 5.665 + 6.050 + 5.870 + 6.117 + 5.683) / 10 = 5.805

after:
(4.884 + 5.549 + 5.749 + 6.001 + 5.460 + 5.103 + 5.956 + 6.112 + 5.468 + 5.093) / 10 = 5.538

(5.805 - 5.538) / 5.805 = 4.6%

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---

Changes since v1:
 * Replace signed with unsigned in unsigned divide function
 * drop & in inline assembly
 * Use IS_ENABLED() instead of #ifdef
 * Pass DIV_V7 into lib1funcs.S instead of depending on ZIMAGE or CPU_V7

 arch/arm/kernel/setup.c  | 13 ++++++++++-
 arch/arm/lib/Makefile    |  6 +++++
 arch/arm/lib/div-v7.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/arm/lib/lib1funcs.S | 16 +++++++++++++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/lib/div-v7.c

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 0e1e2b3..f9e577a 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -30,6 +30,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/sort.h>
+#include <linux/static_key.h>
 
 #include <asm/unified.h>
 #include <asm/cp15.h>
@@ -365,9 +366,11 @@ void __init early_print(const char *str, ...)
 	printk("%s", buf);
 }
 
+struct static_key cpu_has_idiv = STATIC_KEY_INIT_FALSE;
+
 static void __init cpuid_init_hwcaps(void)
 {
-	unsigned int divide_instrs, vmsa;
+	unsigned int divide_instrs, vmsa, idiv_mask;
 
 	if (cpu_architecture() < CPU_ARCH_ARMv7)
 		return;
@@ -381,6 +384,14 @@ static void __init cpuid_init_hwcaps(void)
 		elf_hwcap |= HWCAP_IDIVT;
 	}
 
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
+		idiv_mask = HWCAP_IDIVT;
+	else
+		idiv_mask = HWCAP_IDIVA;
+
+	if (elf_hwcap & idiv_mask)
+		static_key_slow_inc(&cpu_has_idiv);
+
 	/* LPAE implies atomic ldrd/strd instructions */
 	vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0;
 	if (vmsa >= 5)
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index bd454b0..38621729 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -15,6 +15,12 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 		   call_with_stack.o
 
+lib-$(CONFIG_CPU_V7) += div-v7.o
+CFLAGS_div-v7.o := -march=armv7-a
+ifeq ($(CONFIG_CPU_V7),y)
+  AFLAGS_lib1funcs.o := -DDIV_V7
+endif
+
 mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
 
 # the code in uaccess.S is not preemption safe and
diff --git a/arch/arm/lib/div-v7.c b/arch/arm/lib/div-v7.c
new file mode 100644
index 0000000..e20945a
--- /dev/null
+++ b/arch/arm/lib/div-v7.c
@@ -0,0 +1,58 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/static_key.h>
+
+extern int ___aeabi_idiv(int, int);
+extern unsigned ___aeabi_uidiv(int, int);
+
+extern struct static_key cpu_has_idiv;
+
+int __aeabi_idiv(int numerator, int denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		int ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"sdiv %0, %1, %2"
+		: "=r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_idiv(numerator, denominator);
+}
+
+int __divsi3(int numerator, int denominator)
+	__attribute__((alias("__aeabi_idiv")));
+
+unsigned __aeabi_uidiv(unsigned numerator, unsigned denominator)
+{
+	if (static_key_false(&cpu_has_idiv)) {
+		unsigned ret;
+
+		asm volatile (
+		".arch_extension idiv\n"
+		"udiv %0, %1, %2"
+		: "=r" (ret)
+		: "r" (numerator), "r" (denominator));
+
+		return ret;
+	}
+
+	return ___aeabi_uidiv(numerator, denominator);
+}
+
+unsigned __udivsi3(unsigned numerator, unsigned denominator)
+	__attribute__((alias("__aeabi_uidiv")));
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index c562f64..82bbcc7 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -205,8 +205,12 @@ Boston, MA 02111-1307, USA.  */
 .endm
 
 
+#ifdef DIV_V7
+ENTRY(___aeabi_uidiv)
+#else
 ENTRY(__udivsi3)
 ENTRY(__aeabi_uidiv)
+#endif
 UNWIND(.fnstart)
 
 	subs	r2, r1, #1
@@ -232,8 +236,12 @@ UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#ifdef DIV_V7
+ENDPROC(___aeabi_uidiv)
+#else
 ENDPROC(__udivsi3)
 ENDPROC(__aeabi_uidiv)
+#endif
 
 ENTRY(__umodsi3)
 UNWIND(.fnstart)
@@ -253,8 +261,12 @@ UNWIND(.fnstart)
 UNWIND(.fnend)
 ENDPROC(__umodsi3)
 
+#ifdef DIV_V7
+ENTRY(___aeabi_idiv)
+#else
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
+#endif
 UNWIND(.fnstart)
 
 	cmp	r1, #0
@@ -293,8 +305,12 @@ UNWIND(.fnstart)
 	mov	pc, lr
 
 UNWIND(.fnend)
+#ifdef DIV_V7
+ENDPROC(___aeabi_idiv)
+#else
 ENDPROC(__divsi3)
 ENDPROC(__aeabi_idiv)
+#endif
 
 ENTRY(__modsi3)
 UNWIND(.fnstart)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation


             reply	other threads:[~2013-11-08 23:00 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-08 23:00 Stephen Boyd [this message]
2013-11-08 23:00 ` [PATCH v2] ARM: Use udiv/sdiv for __aeabi_{u}idiv library functions Stephen Boyd
2013-11-09  6:46 ` Matt Sealey
2013-11-09  6:46   ` Matt Sealey
2013-11-09 18:20   ` Måns Rullgård
2013-11-09 18:20     ` Måns Rullgård
2013-11-12  1:23   ` Stephen Boyd
2013-11-12  1:23     ` Stephen Boyd
2013-11-10  5:03 ` Nicolas Pitre
2013-11-10  5:03   ` Nicolas Pitre
2013-11-12  2:34   ` Stephen Boyd
2013-11-12  2:34     ` Stephen Boyd
2013-11-12 11:28     ` Måns Rullgård
2013-11-12 11:28       ` Måns Rullgård
2013-11-12 14:01     ` Nicolas Pitre
2013-11-12 14:01       ` Nicolas Pitre
2013-11-12 14:04       ` Russell King - ARM Linux
2013-11-12 14:04         ` Russell King - ARM Linux
2013-11-12 14:16         ` Nicolas Pitre
2013-11-12 14:16           ` Nicolas Pitre
2013-11-12 14:17         ` Ben Dooks
2013-11-12 14:17           ` Ben Dooks
2013-11-12 14:32           ` Nicolas Pitre
2013-11-12 14:32             ` Nicolas Pitre
2013-11-12 14:40             ` Måns Rullgård
2013-11-12 14:40               ` Måns Rullgård
2013-11-12 14:55               ` Nicolas Pitre
2013-11-12 14:55                 ` Nicolas Pitre
2013-11-12 15:20                 ` Nicolas Pitre
2013-11-12 15:20                   ` Nicolas Pitre
2013-11-12 18:03                 ` Måns Rullgård
2013-11-12 18:03                   ` Måns Rullgård
2013-11-12 14:22       ` Måns Rullgård
2013-11-12 14:22         ` Måns Rullgård
2013-11-12 14:36         ` Nicolas Pitre
2013-11-12 14:36           ` Nicolas Pitre
2013-11-11  7:46 ` Uwe Kleine-König
2013-11-11  7:46   ` Uwe Kleine-König
2013-11-12  2:35   ` Stephen Boyd
2013-11-12  2:35     ` Stephen Boyd

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1383951632-6090-1-git-send-email-sboyd@codeaurora.org \
    --to=sboyd@codeaurora.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.