From: Alexey Dobriyan <adobriyan@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com,
hpa@zytor.com, Alexey Dobriyan <adobriyan@gmail.com>
Subject: [PATCH 2/5] -march=native: POPCNT support
Date: Fri, 8 Dec 2017 01:41:51 +0300 [thread overview]
Message-ID: <20171207224154.4687-2-adobriyan@gmail.com> (raw)
In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com>
Mainline kernel can only generate "popcnt rax, rdi" instruction
with alternative masquareading as function call. Patch allows
to generate all POPCNT variations and inlines hweigth*() family of functions.
$ objdump -dr ../obj/vmlinux | grep popcnt
ffffffff81004f6d: f3 48 0f b8 c9 popcnt rcx,rcx
ffffffff81008484: f3 48 0f b8 03 popcnt rax,QWORD PTR [rbx]
ffffffff81073aae: f3 48 0f b8 d8 popcnt rbx,rax
...
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/arch_hweight.h | 32 ++++++++++++++++++++++++++++++--
arch/x86/lib/Makefile | 5 ++++-
include/linux/bitops.h | 2 ++
lib/Makefile | 2 ++
scripts/kconfig/cpuid.c | 6 ++++++
scripts/march-native.sh | 6 +++++-
6 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 34a10b2d5b73..58e4f65d8665 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -2,6 +2,34 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
+#define __HAVE_ARCH_SW_HWEIGHT
+
+#ifdef CONFIG_MARCH_NATIVE_POPCNT
+static inline unsigned int __arch_hweight64(uint64_t x)
+{
+ uint64_t rv;
+ asm ("popcnt %1, %0" : "=r" (rv) : "rm" (x));
+ return rv;
+}
+
+static inline unsigned int __arch_hweight32(uint32_t x)
+{
+ uint32_t rv;
+ asm ("popcnt %1, %0" : "=r" (rv) : "rm" (x));
+ return rv;
+}
+
+static inline unsigned int __arch_hweight16(uint16_t x)
+{
+ return __arch_hweight32(x);
+}
+
+static inline unsigned int __arch_hweight8(uint8_t x)
+{
+ return __arch_hweight32(x);
+}
+#else
+
#include <asm/cpufeatures.h>
#ifdef CONFIG_64BIT
@@ -18,8 +46,6 @@
#define REG_OUT "a"
#endif
-#define __HAVE_ARCH_SW_HWEIGHT
-
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
@@ -61,3 +87,5 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
#endif /* CONFIG_X86_32 */
#endif
+
+#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..c26ad76e7048 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,7 +27,10 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
-obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+obj-y += msr.o msr-reg.o msr-reg-export.o
+ifneq ($(CONFIG_MARCH_NATIVE),y)
+ obj-y += hweight.o
+endif
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4cac4e1a72ff..ab58fed4ab90 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -26,10 +26,12 @@
(((~0ULL) - (1ULL << (l)) + 1) & \
(~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+#ifndef CONFIG_MARCH_NATIVE_POPCNT
extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
extern unsigned int __sw_hweight32(unsigned int w);
extern unsigned long __sw_hweight64(__u64 w);
+#endif
/*
* Include this here because some architectures need generic_ffs/fls in
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..3867b73721aa 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -81,7 +81,9 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+ifneq ($(CONFIG_MARCH_NATIVE_POPCNT),y)
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+endif
obj-$(CONFIG_BTREE) += btree.o
obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index f1983027fe2b..e565dd446bdf 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -42,6 +42,8 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
);
}
+static bool popcnt = false;
+
static uint32_t eax0_max;
static void intel(void)
@@ -51,6 +53,9 @@ static void intel(void)
if (eax0_max >= 1) {
cpuid(1, &eax, &ecx, &edx, &ebx);
// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+
+ if (ecx & (1 << 23))
+ popcnt = true;
}
}
@@ -70,6 +75,7 @@ int main(int argc, char *argv[])
intel();
#define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+ _(popcnt);
#undef _
return EXIT_FAILURE;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index 4f0fc82f7722..6641e356b646 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -29,6 +29,10 @@ option() {
echo "#define $1 1" >>"$AUTOCONF2"
}
+if test -x "$CPUID"; then
+ "$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT"
+fi
+
if test ! -f "$AUTOCONF1" -o ! -f "$AUTOCONF2"; then
exit 0
fi
@@ -72,7 +76,7 @@ for i in $COLLECT_GCC_OPTIONS; do
-mmmx) option "CONFIG_MARCH_NATIVE_MMX" ;;
-mmovbe) option "CONFIG_MARCH_NATIVE_MOVBE" ;;
-mpclmul) option "CONFIG_MARCH_NATIVE_PCLMUL" ;;
- -mpopcnt) option "CONFIG_MATCH_NATIVE_POPCNT" ;;
+ -mpopcnt);;
-mprfchw) option "CONFIG_MARCH_NATIVE_PREFETCHW" ;;
-mrdrnd) option "CONFIG_MARCH_NATIVE_RDRND" ;;
-mrdseed) option "CONFIG_MARCH_NATIVE_RDSEED" ;;
--
2.13.6
next prev parent reply other threads:[~2017-12-07 22:42 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 22:41 [PATCH v0 1/5] x86_64: march=native support Alexey Dobriyan
2017-12-07 22:41 ` Alexey Dobriyan [this message]
2017-12-07 23:07 ` [PATCH 2/5] -march=native: POPCNT support H. Peter Anvin
2017-12-08 10:09 ` Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 3/5] -march=native: REP MOVSB support Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 4/5] -march=native: REP STOSB Alexey Dobriyan
2017-12-08 19:08 ` Andi Kleen
2017-12-07 22:41 ` [PATCH 5/5] -march=native: MOVBE support Alexey Dobriyan
2017-12-07 23:32 ` [PATCH v0 1/5] x86_64: march=native support H. Peter Anvin
2017-12-08 9:57 ` Alexey Dobriyan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171207224154.4687-2-adobriyan@gmail.com \
--to=adobriyan@gmail.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.