public inbox for linux-um@lists.infradead.org
 help / color / mirror / Atom feed
* Amended and retested for 32 bit "borrow ops" series
@ 2020-12-11 17:45 anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins anton.ivanov
                   ` (6 more replies)
  0 siblings, 7 replies; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard

Hi All,

Apologies for sending a broken version for 32 bit.

This is an amended version which has been tested for 32 bit with/without
glibc "borrowing" and 64 bit with/without glibc borrowing.

It also includes a proper 32 bit futex implementation.

I have combined all patches and bumped the version to no 4.

The total performance difference as measured with dd on a ubd device
which is mostly cached in-memory is ~ 1.3 times for 64 bit and 2.4 times
for 32 bit.

Other benchmarks (find, boot time, etc) benefit as well, but the benefit
is not so drastic.

All tests performed on a Ryzen 5 1600X host.



_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 20:03   ` Johannes Berg
  2020-12-11 17:45 ` [PATCH v4 2/7] um: enable the use of optimized xor routines in UML anton.ivanov
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

The UML kernel runs as a normal userspace process and can use most
of glibc string.h functionality instead of built-ins. At present,
when using built-ins, it is also limited to their unoptimized
versions, because it does not have the runtime code patching present
on x86 via apply_alternatives()

Allowing the use of glibc for memcpy, memmove, etc provides 1-5%
boost on common file io as measured by cat and dd. The size of
the linux executable is also reduced by ~10KBytes.

It is possible to turn this on/off via the kernel configuration.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/Kconfig              | 10 ++++++
 arch/um/include/asm/string.h | 70 ++++++++++++++++++++++++++++++++++++
 arch/um/os-Linux/user_syms.c | 28 ++++++++++++++-
 arch/x86/um/Makefile         |  6 ++--
 4 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 arch/um/include/asm/string.h

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 4b799fad8b48..ec2802ded9d1 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -189,6 +189,16 @@ config UML_TIME_TRAVEL_SUPPORT
 
 	  It is safe to say Y, but you probably don't need this.
 
+config UML_USE_BUILT_IN_STRINGS
+	bool
+	default N
+	prompt "Use kernel memcpy, memmove, etc"
+	help
+	  UML can use the kernel built in memcpy, memmove, etc functions
+	  or use the glibc equivalents instead.
+	  The glibc equivalents are slightly faster and will result in a
+	  slightly smaller executable when linking UML dynamically.
+
 endmenu
 
 source "arch/um/drivers/Kconfig"
diff --git a/arch/um/include/asm/string.h b/arch/um/include/asm/string.h
new file mode 100644
index 000000000000..ac16890e0867
--- /dev/null
+++ b/arch/um/include/asm/string.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (C) 2020 Cambridge Greys Ltd.
+ * Copyright (C) 2020 Red Hat Inc.
+ */
+
+#ifndef __ASM_UM_STRING_H
+#define __ASM_UM_STRING_H
+
+#ifndef CONFIG_UML_USE_BUILT_IN_STRINGS
+
+#include <stddef.h>
+
+#define __HAVE_ARCH_STRCPY
+extern char *strcpy(char *dest, const char *src);
+#define __HAVE_ARCH_STRNCPY
+extern char *strncpy(char *dest, const char *src, size_t n);
+#define __HAVE_ARCH_STRCAT
+extern char *strcat(char *dest, const char *src);
+#define __HAVE_ARCH_STRNCAT
+extern char *strncat(char *dest, const char *src, size_t n);
+#define __HAVE_ARCH_STRCMP
+extern int strcmp(const char *s1, const char *s2);
+#define __HAVE_ARCH_STRNCMP
+extern int strncmp(const char *s1, const char *s2, size_t n);
+#define __HAVE_ARCH_STRCHR
+extern char *strchr(const char *s, int c);
+#define __HAVE_ARCH_STRLEN
+extern size_t strlen(const char *s);
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *dest, const void *src, size_t n);
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *dest, const void *src, size_t n);
+#define __HAVE_ARCH_MEMCHR
+extern void *memchr(const void *s, int c, size_t n);
+#define __HAVE_ARCH_STRNLEN
+extern size_t strnlen(const char *s, size_t maxlen);
+#define __HAVE_ARCH_STRSTR
+extern char *strstr(const char *haystack, const char *needle);
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, size_t n);
+#define __HAVE_ARCH_STRNCASECMP
+extern int strncasecmp(const char *s1, const char *s2, size_t n);
+#define __HAVE_ARCH_STRCASECMP
+extern int strcasecmp(const char *s1, const char *s2);
+#define __HAVE_ARCH_STRCHRNUL
+extern char *strchrnul(const char *s, int c);
+#define __HAVE_ARCH_STRRCH
+extern char *strchrnul(const char *s, int c);
+#define __HAVE_ARCH_STRSPN
+extern size_t strspn(const char *s, const char *accept);
+#define __HAVE_ARCH_STRCSPN
+extern size_t strcspn(const char *s, const char *reject);
+#define __HAVE_ARCH_STRPBRK
+extern char *strpbrk(const char *s, const char *accept);
+#define __HAVE_ARCH_STRSEP
+extern char *strsep(char **stringp, const char *delim);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *s1, const void *s2, size_t n);
+#define __HAVE_ARCH_BCMP
+extern int bcmp(const void *s1, const void *s2, size_t n);
+
+#else
+
+#include <asm-generic/string.h>
+
+#endif
+
+#endif /* __ASM_UM_STRING_H */
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 715594fe5719..040c8303e0b4 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -9,6 +9,7 @@
  * add an EXPORT for the glibc one.
  */
 
+#ifdef CONFIG_UML_USE_BUILT_IN_STRINGS
 #undef strlen
 #undef strstr
 #undef memcpy
@@ -17,6 +18,7 @@
 extern size_t strlen(const char *);
 extern void *memmove(void *, const void *, size_t);
 extern void *memset(void *, int, size_t);
+#endif
 extern int printf(const char *, ...);
 
 /* If it's not defined, the export is included in lib/string.c.*/
@@ -24,14 +26,38 @@ extern int printf(const char *, ...);
 EXPORT_SYMBOL(strstr);
 #endif
 
-#ifndef __x86_64__
+#if !defined(__x86_64__) && defined(CONFIG_UML_USE_BUILT_IN_STRINGS)
 extern void *memcpy(void *, const void *, size_t);
 EXPORT_SYMBOL(memcpy);
 #endif
 
+
+#ifndef CONFIG_UML_USE_BUILT_IN_STRINGS
+/* These all come from glibc */
+EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(printf);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(memchr);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strncasecmp);
+EXPORT_SYMBOL(strcasecmp);
+EXPORT_SYMBOL(strchrnul);
+EXPORT_SYMBOL(strcspn);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strsep);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(bcmp);
+
+#endif
 
 /* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms.
  * However, the modules will use the CRC defined *here*, no matter if it is
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 77f70b969d14..453ea23a9770 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -20,14 +20,16 @@ ifeq ($(CONFIG_X86_32),y)
 obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
-subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-y = ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
 subarch-y += ../kernel/sys_ia32.o
+subarch-$(CONFIG_UML_USE_BUILT_IN_STRINGS) += ../lib/string_32.o
 
 else
 
 obj-y += syscalls_64.o vdso/
 
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
+subarch-y = ../lib/csum-partial_64.o ../entry/thunk_64.o
+subarch-$(CONFIG_UML_USE_BUILT_IN_STRINGS) += ../lib/memcpy_64.o
 
 endif
 
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 20:07   ` Johannes Berg
  2020-12-11 17:45 ` [PATCH v4 3/7] um: "borrow" atomics from x86 architecture anton.ivanov
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

This patch enable the use of optimized xor routines from the x86
tree as well as supply the necessary macros for them to be used in
UML.

The macros supply several "fake" flags and definitions to allow
using the x86 files "as is".

This patchset implements only the flags needed for the optimized
strings.h, xor.h and checksum.h implementations instead of
trying to copy the entire x86 flags environment.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/cpufeature.h        | 17 +++++++++++++
 arch/um/include/asm/cpufeatures.h       | 15 ++++++++++++
 arch/um/include/asm/fpu/api.h           | 14 +++++++++++
 arch/um/include/asm/processor-generic.h |  3 +++
 arch/um/include/asm/xor-x86.h           |  1 +
 arch/um/include/asm/xor.h               | 17 ++++++++++++-
 arch/um/include/asm/xor_32.h            |  1 +
 arch/um/include/asm/xor_64.h            |  1 +
 arch/um/include/asm/xor_avx.h           |  1 +
 arch/um/include/shared/os.h             |  1 +
 arch/um/kernel/um_arch.c                | 17 +++++++++++--
 arch/um/os-Linux/start_up.c             | 32 +++++++++++++++++++++++++
 12 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 arch/um/include/asm/cpufeature.h
 create mode 100644 arch/um/include/asm/cpufeatures.h
 create mode 100644 arch/um/include/asm/fpu/api.h
 create mode 120000 arch/um/include/asm/xor-x86.h
 create mode 120000 arch/um/include/asm/xor_32.h
 create mode 120000 arch/um/include/asm/xor_64.h
 create mode 120000 arch/um/include/asm/xor_avx.h

diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h
new file mode 100644
index 000000000000..19a2394d03a4
--- /dev/null
+++ b/arch/um/include/asm/cpufeature.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CPUFEATURE_H
+#define _ASM_UM_CPUFEATURE_H
+
+#include <asm/asm.h>
+#include <linux/bitops.h>
+#include <asm/processor-generic.h>
+#include <asm/cpufeatures.h>
+
+
+const char *host_cpu_feature_names[] = {"mmx", "xmm", "avx", "osxsave", "rep_good", "erms", "xmm2"};
+#define MAX_UM_CPU_FEATURES ARRAY_SIZE(host_cpu_feature_names)
+
+
+#define boot_cpu_has(bit)	(boot_cpu_data.host_features & bit)
+
+#endif /* _ASM_UM_CPUFEATURE_H */
diff --git a/arch/um/include/asm/cpufeatures.h b/arch/um/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..d02da39b039d
--- /dev/null
+++ b/arch/um/include/asm/cpufeatures.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CPUFEATURES_H
+#define _ASM_UM_CPUFEATURES_H
+
+/* Fake x86 Features of actual interest to UML */
+
+#define X86_FEATURE_MMX (1 << 0)
+#define X86_FEATURE_XMM (1 << 1)
+#define X86_FEATURE_AVX (1 << 2)
+#define X86_FEATURE_OSXSAVE (1 << 3)
+#define X86_FEATURE_REP_GOOD (1 << 4)
+#define X86_FEATURE_ERMS (1 << 5)
+#define X86_FEATURE_XMM2 (1 << 6)
+
+#endif /* _ASM_UM_CPUFEATURES_H */
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
new file mode 100644
index 000000000000..2873a6f0105d
--- /dev/null
+++ b/arch/um/include/asm/fpu/api.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_UM_FPU_API_H
+#define _ASM_UM_FPU_API_H
+
+/* Copyright (c) 2020 Cambridge Greys Ltd
+ * Copyright (c) 2020 Red Hat Inc.
+ * A set of "dummy" defines to allow the direct inclusion
+ * of x86 optimized copy, xor, etc routines into the
+ * UML code tree. */
+
+#define kernel_fpu_begin() (void)0
+#define kernel_fpu_end() (void)0
+
+#endif
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index afd9b267cf81..b8bcddbb1898 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -90,6 +90,9 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry,
 struct cpuinfo_um {
 	unsigned long loops_per_jiffy;
 	int ipi_pipe[2];
+	/* There is only a small set of x86 features we are interested
+	 * in for now */
+	unsigned long host_features;
 };
 
 extern struct cpuinfo_um boot_cpu_data;
diff --git a/arch/um/include/asm/xor-x86.h b/arch/um/include/asm/xor-x86.h
new file mode 120000
index 000000000000..beff7de6890d
--- /dev/null
+++ b/arch/um/include/asm/xor-x86.h
@@ -0,0 +1 @@
+../../../x86/include/asm/xor.h
\ No newline at end of file
diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
index 36b33d62a35d..18bcb5b6189d 100644
--- a/arch/um/include/asm/xor.h
+++ b/arch/um/include/asm/xor.h
@@ -1,7 +1,22 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/xor.h>
+#ifndef _ASM_UM_XOR_H
+#define _ASM_UM_XOR_H
+
+#ifdef CONFIG_64BIT
+#undef CONFIG_X86_32
+#else
+#define CONFIG_X86_32 1
+#endif
+
+#include <asm/cpufeature.h>
+#include <asm/xor-x86.h>
 #include <linux/time-internal.h>
 
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#undef XOR_SELECT_TEMPLATE
 /* pick an arbitrary one - measuring isn't possible with inf-cpu */
 #define XOR_SELECT_TEMPLATE(x)	\
 	(time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL)
+#endif
+
+#endif
diff --git a/arch/um/include/asm/xor_32.h b/arch/um/include/asm/xor_32.h
new file mode 120000
index 000000000000..8a0894e996d7
--- /dev/null
+++ b/arch/um/include/asm/xor_32.h
@@ -0,0 +1 @@
+../../../x86/include/asm/xor_32.h
\ No newline at end of file
diff --git a/arch/um/include/asm/xor_64.h b/arch/um/include/asm/xor_64.h
new file mode 120000
index 000000000000..b8d346c516bf
--- /dev/null
+++ b/arch/um/include/asm/xor_64.h
@@ -0,0 +1 @@
+../../../x86/include/asm/xor_64.h
\ No newline at end of file
diff --git a/arch/um/include/asm/xor_avx.h b/arch/um/include/asm/xor_avx.h
new file mode 120000
index 000000000000..370ded122095
--- /dev/null
+++ b/arch/um/include/asm/xor_avx.h
@@ -0,0 +1 @@
+../../../x86/include/asm/xor_avx.h
\ No newline at end of file
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index f467d28fc0b4..c2ff855af603 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -187,6 +187,7 @@ int os_poll(unsigned int n, const int *fds);
 extern void os_early_checks(void);
 extern void os_check_bugs(void);
 extern void check_host_supports_tls(int *supports_tls, int *tls_min);
+extern unsigned long check_host_cpu_features(const char **feature_names, int n);
 
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 76b37297b7d4..b7dfc4fcc130 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -15,6 +15,7 @@
 #include <linux/kmsg_dump.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <as-layout.h>
@@ -48,9 +49,12 @@ static void __init add_arg(char *arg)
  */
 struct cpuinfo_um boot_cpu_data = {
 	.loops_per_jiffy	= 0,
-	.ipi_pipe		= { -1, -1 }
+	.ipi_pipe		= { -1, -1 },
+	.host_features		= 0
 };
 
+EXPORT_SYMBOL(boot_cpu_data);
+
 union thread_union cpu0_irqstack
 	__section(".data..init_irqstack") =
 		{ .thread_info = INIT_THREAD_INFO(init_task) };
@@ -67,9 +71,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, "model name\t: UML\n");
 	seq_printf(m, "mode\t\t: skas\n");
 	seq_printf(m, "host\t\t: %s\n", host_info);
-	seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+	seq_printf(m, "bogomips\t: %lu.%02lu\n",
 		   loops_per_jiffy/(500000/HZ),
 		   (loops_per_jiffy/(5000/HZ)) % 100);
+	seq_printf(m, "flags\t\t:");
+	for (index = 0; index < MAX_UM_CPU_FEATURES; index++) {
+		if (boot_cpu_data.host_features & (1 << index))
+			seq_printf(m, " %s", host_cpu_feature_names[index]);
+	}
+	seq_printf(m, "\n\n");
 
 	return 0;
 }
@@ -275,6 +285,9 @@ int __init linux_main(int argc, char **argv)
 	/* OS sanity checks that need to happen before the kernel runs */
 	os_early_checks();
 
+	boot_cpu_data.host_features =
+		check_host_cpu_features(host_cpu_feature_names, MAX_UM_CPU_FEATURES);
+
 	brk_start = (unsigned long) sbrk(0);
 
 	/*
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f79dc338279e..be884ed86b30 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -321,6 +321,38 @@ static void __init check_coredump_limit(void)
 		os_info("%llu\n", (unsigned long long)lim.rlim_max);
 }
 
+unsigned long  __init check_host_cpu_features(const char **feature_names, int n)
+{
+	FILE *cpuinfo;
+	char *line = NULL;
+	size_t len = 0;
+	int i;
+	bool done_parsing = false;
+	unsigned long result = 0;
+
+	cpuinfo = fopen("/proc/cpuinfo", "r");
+	if (cpuinfo == NULL) {
+		os_info("Failed to get host CPU features\n");
+	} else {
+		while ((getline(&line, &len, cpuinfo)) != -1) {
+			if (strstr(line, "flags")) {
+				for (i = 0; i < n; i++) {
+					if (strstr(line, feature_names[i])) {
+						result |= (1 << i);
+					}
+				}
+				done_parsing = true;
+			}
+			free(line);
+			line = NULL;
+			if (done_parsing)
+				break;
+		}
+		fclose(cpuinfo);
+	}
+	return result;
+}
+
 void __init os_early_checks(void)
 {
 	int pid;
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 3/7] um: "borrow" atomics from x86 architecture
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 2/7] um: enable the use of optimized xor routines in UML anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 20:08   ` Johannes Berg
  2020-12-11 17:45 ` [PATCH v4 4/7] um: add a UML specific futex implementation anton.ivanov
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

This moves UML on 64 bit from generic atomics to their x86
equivalents.

Generic atomics bracket most operations by interrupts off/interrupts
on. This is quite expensive on UML - it translates to changing
the signal mask and rerunning the signal loop every time.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/atomic-x86.h  | 1 +
 arch/um/include/asm/atomic.h      | 7 +++++++
 arch/um/include/asm/atomic64_64.h | 1 +
 3 files changed, 9 insertions(+)
 create mode 120000 arch/um/include/asm/atomic-x86.h
 create mode 100644 arch/um/include/asm/atomic.h
 create mode 120000 arch/um/include/asm/atomic64_64.h

diff --git a/arch/um/include/asm/atomic-x86.h b/arch/um/include/asm/atomic-x86.h
new file mode 120000
index 000000000000..e71a389e112d
--- /dev/null
+++ b/arch/um/include/asm/atomic-x86.h
@@ -0,0 +1 @@
+../../../x86/include/asm/atomic.h
\ No newline at end of file
diff --git a/arch/um/include/asm/atomic.h b/arch/um/include/asm/atomic.h
new file mode 100644
index 000000000000..6ff22c587c10
--- /dev/null
+++ b/arch/um/include/asm/atomic.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ATOMIC_XOR_H
+#define _ASM_ATOMIC_XOR_H
+
+#include <asm/atomic-x86.h>
+
+#endif
diff --git a/arch/um/include/asm/atomic64_64.h b/arch/um/include/asm/atomic64_64.h
new file mode 120000
index 000000000000..f817a1478603
--- /dev/null
+++ b/arch/um/include/asm/atomic64_64.h
@@ -0,0 +1 @@
+../../../x86/include/asm/atomic64_64.h
\ No newline at end of file
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 4/7] um: add a UML specific futex implementation
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
                   ` (2 preceding siblings ...)
  2020-12-11 17:45 ` [PATCH v4 3/7] um: "borrow" atomics from x86 architecture anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 20:10   ` Johannes Berg
  2020-12-11 17:45 ` [PATCH v4 5/7] um: "borrow" cmpxchg from x86 tree in UML anton.ivanov
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

The generic asm futex implementation emulates atomic access to
memory by doing a get_user followed by put_user. These translate
to two mapping operations on UML with paging enabled in the
meantime. This, in turn may end up changing interrupts,
invoking the signal loop, etc.

This replaces the generic implementation by a mapping followed
by an operation on the mapped segment.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/Kbuild    |   1 -
 arch/um/include/asm/futex.h   |  14 ++++
 arch/um/kernel/skas/uaccess.c | 130 ++++++++++++++++++++++++++++++++++
 3 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 arch/um/include/asm/futex.h

diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 1c63b260ecc4..873e6815bc50 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -8,7 +8,6 @@ generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += ftrace.h
-generic-y += futex.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h
new file mode 100644
index 000000000000..0a01f1f60de0
--- /dev/null
+++ b/arch/um/include/asm/futex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_FUTEX_H
+#define _ASM_GENERIC_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+
+extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
+extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval);
+
+#endif
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 2dec915abe6f..de71048b5371 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -11,6 +11,7 @@
 #include <asm/current.h>
 #include <asm/page.h>
 #include <kern_util.h>
+#include <asm/futex.h>
 #include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
@@ -248,3 +249,132 @@ long __strnlen_user(const void __user *str, long len)
 	return 0;
 }
 EXPORT_SYMBOL(__strnlen_user);
+
+/**
+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ *			  argument and comparison of the previous
+ *			  futex value with another constant.
+ *
+ * @encoded_op:	encoded operation to execute
+ * @uaddr:	pointer to user space address
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
+ */
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+{
+	int oldval, ret;
+	struct page *page;
+	unsigned long addr = (unsigned long) uaddr;
+	pte_t *pte;
+
+	ret = -EFAULT;
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+	preempt_disable();
+	pte = maybe_map((unsigned long) uaddr, 1);
+	if (pte == NULL)
+		goto out_inuser;
+
+	page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+	pagefault_disable();
+	addr = page_address(page) + (((unsigned long) addr) & ~PAGE_MASK);
+#else
+	addr = (unsigned long) kmap_atomic(page) +
+		((unsigned long) addr & ~PAGE_MASK);
+#endif
+
+	oldval = *((u32 *)addr);
+
+	ret = 0;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		*uaddr = oparg;
+		break;
+	case FUTEX_OP_ADD:
+		*uaddr += oparg;
+		break;
+	case FUTEX_OP_OR:
+		*uaddr |= oparg;
+		break;
+	case FUTEX_OP_ANDN:
+		*uaddr &= ~oparg;
+		break;
+	case FUTEX_OP_XOR:
+		*uaddr ^= oparg;
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+#ifdef CONFIG_64BIT
+	pagefault_enable();
+#else
+	kunmap_atomic((void *)addr);
+#endif
+
+out_inuser:
+	preempt_enable();
+
+	if (ret == 0)
+		*oval = oldval;
+
+	return ret;
+}
+EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ *				uaddr with newval if the current value is
+ *				oldval.
+ * @uval:	pointer to store content of @uaddr
+ * @uaddr:	pointer to user space address
+ * @oldval:	old value
+ * @newval:	new value to store to @uaddr
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
+ */
+
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	u32 val;
+	struct page *page;
+	pte_t *pte;
+	int ret = -EFAULT;
+
+	if (!access_ok(uaddr, sizeof(*uaddr)))
+		return -EFAULT;
+
+	preempt_disable();
+	pte = maybe_map((unsigned long) uaddr, 1);
+	if (pte == NULL)
+		goto out_inatomic;
+
+	page = pte_page(*pte);
+	pagefault_disable();
+	uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
+
+	val = *uaddr;
+
+	if (val == oldval)
+		*uaddr = newval;
+
+	*uval = val;
+	pagefault_enable();
+	ret = 0;
+
+out_inatomic:
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 5/7] um: "borrow" cmpxchg from x86 tree in UML
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
                   ` (3 preceding siblings ...)
  2020-12-11 17:45 ` [PATCH v4 4/7] um: add a UML specific futex implementation anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 6/7] um: swithch futex ops to cmpxchg anton.ivanov
  2020-12-11 17:45 ` [PATCH v4 7/7] um: borrow bitops from the x86 tree anton.ivanov
  6 siblings, 0 replies; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

UML was falling back to asm-generic cmpxchg which emulates atomic
behaviour by turning irqs on/off. On UML this means turning
on/off signals for each cmpxchg which is expensive.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/cmpxchg-x86.h | 1 +
 arch/um/include/asm/cmpxchg.h     | 7 +++++++
 arch/um/include/asm/cmpxchg_32.h  | 1 +
 arch/um/include/asm/cmpxchg_64.h  | 1 +
 4 files changed, 10 insertions(+)
 create mode 120000 arch/um/include/asm/cmpxchg-x86.h
 create mode 100644 arch/um/include/asm/cmpxchg.h
 create mode 120000 arch/um/include/asm/cmpxchg_32.h
 create mode 120000 arch/um/include/asm/cmpxchg_64.h

diff --git a/arch/um/include/asm/cmpxchg-x86.h b/arch/um/include/asm/cmpxchg-x86.h
new file mode 120000
index 000000000000..a5c607edde88
--- /dev/null
+++ b/arch/um/include/asm/cmpxchg-x86.h
@@ -0,0 +1 @@
+../../../x86/include/asm/cmpxchg.h
\ No newline at end of file
diff --git a/arch/um/include/asm/cmpxchg.h b/arch/um/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..03e7a669eeef
--- /dev/null
+++ b/arch/um/include/asm/cmpxchg.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_CMPXCHG_H
+#define _ASM_UM_CMPXCHG_H
+
+#include <asm/cmpxchg-x86.h>
+
+#endif
diff --git a/arch/um/include/asm/cmpxchg_32.h b/arch/um/include/asm/cmpxchg_32.h
new file mode 120000
index 000000000000..1f360d0a9fa9
--- /dev/null
+++ b/arch/um/include/asm/cmpxchg_32.h
@@ -0,0 +1 @@
+../../../x86/include/asm/cmpxchg_32.h
\ No newline at end of file
diff --git a/arch/um/include/asm/cmpxchg_64.h b/arch/um/include/asm/cmpxchg_64.h
new file mode 120000
index 000000000000..bc09ae22b0cb
--- /dev/null
+++ b/arch/um/include/asm/cmpxchg_64.h
@@ -0,0 +1 @@
+../../../x86/include/asm/cmpxchg_64.h
\ No newline at end of file
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 6/7] um: swithch futex ops to cmpxchg
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
                   ` (4 preceding siblings ...)
  2020-12-11 17:45 ` [PATCH v4 5/7] um: "borrow" cmpxchg from x86 tree in UML anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  2020-12-11 20:12   ` Johannes Berg
  2020-12-11 17:45 ` [PATCH v4 7/7] um: borrow bitops from the x86 tree anton.ivanov
  6 siblings, 1 reply; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

As a result of switching from emulated to true atomic cmpxchg
we can use cmpxchg in the corresponding um futex op.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/kernel/skas/uaccess.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index de71048b5371..858b2e16e364 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -343,7 +343,6 @@ EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
  * -EAGAIN - Atomic operation was unable to complete due to contention
  * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
  */
-
 int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
@@ -366,8 +365,7 @@ int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
 	val = *uaddr;
 
-	if (val == oldval)
-		*uaddr = newval;
+	ret = cmpxchg(uaddr, oldval, newval);
 
 	*uval = val;
 	pagefault_enable();
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v4 7/7] um: borrow bitops from the x86 tree
  2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
                   ` (5 preceding siblings ...)
  2020-12-11 17:45 ` [PATCH v4 6/7] um: swithch futex ops to cmpxchg anton.ivanov
@ 2020-12-11 17:45 ` anton.ivanov
  6 siblings, 0 replies; 21+ messages in thread
From: anton.ivanov @ 2020-12-11 17:45 UTC (permalink / raw)
  To: linux-um; +Cc: richard, Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Using x86 bitops instead of the asm-generic allows to squeeze
a couple of percents improvement on fs IO in UML. It should
improve other areas as well.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 arch/um/include/asm/bitops-x86.h | 1 +
 arch/um/include/asm/bitops.h     | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 120000 arch/um/include/asm/bitops-x86.h
 create mode 100644 arch/um/include/asm/bitops.h

diff --git a/arch/um/include/asm/bitops-x86.h b/arch/um/include/asm/bitops-x86.h
new file mode 120000
index 000000000000..15a96ff554b2
--- /dev/null
+++ b/arch/um/include/asm/bitops-x86.h
@@ -0,0 +1 @@
+../../../x86/include/asm/bitops.h
\ No newline at end of file
diff --git a/arch/um/include/asm/bitops.h b/arch/um/include/asm/bitops.h
new file mode 100644
index 000000000000..a0cd978c6aaa
--- /dev/null
+++ b/arch/um/include/asm/bitops.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UM_BITOPS_H
+#define _ASM_UM_BITOPS_H
+
+#include <asm/bitops-x86.h>
+
+#endif
-- 
2.20.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins
  2020-12-11 17:45 ` [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins anton.ivanov
@ 2020-12-11 20:03   ` Johannes Berg
  0 siblings, 0 replies; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 20:03 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard

On Fri, 2020-12-11 at 17:45 +0000, anton.ivanov@cambridgegreys.com
wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> The UML kernel runs as a normal userspace process and can use most
> of glibc string.h functionality instead of built-ins. At present,
> when using built-ins, it is also limited to their unoptimized
> versions, because it does not have the runtime code patching present
> on x86 via apply_alternatives()
> 
> Allowing the use of glibc for memcpy, memmove, etc provides 1-5%
> boost on common file io as measured by cat and dd. The size of
> the linux executable is also reduced by ~10KBytes.
> 
> It is possible to turn this on/off via the kernel configuration.
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Reviewed-by: Johannes Berg <johannes@sipsolutions.net>

johannes


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 17:45 ` [PATCH v4 2/7] um: enable the use of optimized xor routines in UML anton.ivanov
@ 2020-12-11 20:07   ` Johannes Berg
  2020-12-11 21:57     ` Anton Ivanov
  0 siblings, 1 reply; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 20:07 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard


> +++ b/arch/um/include/asm/fpu/api.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ASM_UM_FPU_API_H
> +#define _ASM_UM_FPU_API_H
> +
> +/* Copyright (c) 2020 Cambridge Greys Ltd
> + * Copyright (c) 2020 Red Hat Inc.
> + * A set of "dummy" defines to allow the direct inclusion
> + * of x86 optimized copy, xor, etc routines into the
> + * UML code tree. */
> +
> +#define kernel_fpu_begin() (void)0
> +#define kernel_fpu_end() (void)0

I think I would prefer those to be static inlines, but YMMV.

johannes

> diff --git a/arch/um/include/asm/xor-x86.h b/arch/um/include/asm/xor-x86.h
> new file mode 120000
> index 000000000000..beff7de6890d
> --- /dev/null
> +++ b/arch/um/include/asm/xor-x86.h
> @@ -0,0 +1 @@
> +../../../x86/include/asm/xor.h

Do these really need to be symlinks? Last I looked, it seemed that
arch/x86/include/asm/ is actually in the include path?

> --- a/arch/um/include/asm/xor.h
> +++ b/arch/um/include/asm/xor.h
> @@ -1,7 +1,22 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
> -#include <asm-generic/xor.h>
> +#ifndef _ASM_UM_XOR_H
> +#define _ASM_UM_XOR_H
> +
> +#ifdef CONFIG_64BIT
> +#undef CONFIG_X86_32
> +#else
> +#define CONFIG_X86_32 1
> +#endif
> +
> +#include <asm/cpufeature.h>
> +#include <asm/xor-x86.h>

and thus this could just be

#include_next <asm/xor.h>

without the symlink?

> diff --git a/arch/um/include/asm/xor_32.h b/arch/um/include/asm/xor_32.h
> new file mode 120000
> index 000000000000..8a0894e996d7
> --- /dev/null
> +++ b/arch/um/include/asm/xor_32.h


And the others don't exist in um anyway, so probably aren't needed?

But maybe I'm confused about the include path, I didn't double-check
now.

johannes



_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 3/7] um: "borrow" atomics from x86 architecture
  2020-12-11 17:45 ` [PATCH v4 3/7] um: "borrow" atomics from x86 architecture anton.ivanov
@ 2020-12-11 20:08   ` Johannes Berg
  2020-12-11 21:31     ` Anton Ivanov
  0 siblings, 1 reply; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 20:08 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard

On Fri, 2020-12-11 at 17:45 +0000, anton.ivanov@cambridgegreys.com
wrote:
> 
> +++ b/arch/um/include/asm/atomic.h
> @@ -0,0 +1,7 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_ATOMIC_XOR_H

XOR?

And also here - are the symlinks really needed?

Not that I mind them too much, but it's a bit strange ... and would be a
lot harder to port to other arches, if anyone should be inclined, if
there are hard symlinks to x86.

johannes


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 4/7] um: add a UML specific futex implementation
  2020-12-11 17:45 ` [PATCH v4 4/7] um: add a UML specific futex implementation anton.ivanov
@ 2020-12-11 20:10   ` Johannes Berg
  0 siblings, 0 replies; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 20:10 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard

On Fri, 2020-12-11 at 17:45 +0000, anton.ivanov@cambridgegreys.com
wrote:
> 
> +++ b/arch/um/include/asm/futex.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_GENERIC_FUTEX_H
> +#define _ASM_GENERIC_FUTEX_H
> +
> +#include <linux/futex.h>
> +#include <linux/uaccess.h>
> +#include <asm/errno.h>
> +
> +
> +extern int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr);
> +extern int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> +			      u32 oldval, u32 newval);

"extern" for functions in header files is kinda pointless, and usually
not used.

(indentation also seems off a bit)

> +/**
> + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
> + *			  argument and comparison of the previous
> + *			  futex value with another constant.

Is there much value in (copying?) the documentation? It wouldn't get
included anywhere, I'd think.

johannes


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 6/7] um: swithch futex ops to cmpxchg
  2020-12-11 17:45 ` [PATCH v4 6/7] um: swithch futex ops to cmpxchg anton.ivanov
@ 2020-12-11 20:12   ` Johannes Berg
  2020-12-11 21:33     ` Anton Ivanov
  0 siblings, 1 reply; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 20:12 UTC (permalink / raw)
  To: anton.ivanov, linux-um; +Cc: richard

Typo in the subject :)

> As a result of switching from emulated to true atomic cmpxchg
> we can use cmpxchg in the corresponding um futex op.

But this seems odd - maybe just reorder the patches so the futex code
can directly have this squashed in?

johannes



_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 3/7] um: "borrow" atomics from x86 architecture
  2020-12-11 20:08   ` Johannes Berg
@ 2020-12-11 21:31     ` Anton Ivanov
  0 siblings, 0 replies; 21+ messages in thread
From: Anton Ivanov @ 2020-12-11 21:31 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard

On 11/12/2020 20:08, Johannes Berg wrote:
> On Fri, 2020-12-11 at 17:45 +0000, anton.ivanov@cambridgegreys.com
> wrote:
>> +++ b/arch/um/include/asm/atomic.h
>> @@ -0,0 +1,7 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ASM_ATOMIC_XOR_H
> XOR?

Brainfart

Will fix it in v5.

>
> And also here - are the symlinks really needed?
>
> Not that I mind them too much, but it's a bit strange ... and would be a
> lot harder to port to other arches, if anyone should be inclined, if
> there are hard symlinks to x86.
>
> johannes
>
>

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 6/7] um: swithch futex ops to cmpxchg
  2020-12-11 20:12   ` Johannes Berg
@ 2020-12-11 21:33     ` Anton Ivanov
  0 siblings, 0 replies; 21+ messages in thread
From: Anton Ivanov @ 2020-12-11 21:33 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard

On 11/12/2020 20:12, Johannes Berg wrote:
> Typo in the subject :)
>
>> As a result of switching from emulated to true atomic cmpxchg
>> we can use cmpxchg in the corresponding um futex op.
> But this seems odd - maybe just reorder the patches so the futex code
> can directly have this squashed in?

You do not want to do that if you are still using the generic cmpxchg. 
That is quite cumbersome. If we reorder the cmpxchg before the futex, 
then it can be folded in.

I will do it in v5

A.

>
> johannes
>
>
>

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 20:07   ` Johannes Berg
@ 2020-12-11 21:57     ` Anton Ivanov
  2020-12-11 22:00       ` Johannes Berg
  0 siblings, 1 reply; 21+ messages in thread
From: Anton Ivanov @ 2020-12-11 21:57 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard

On 11/12/2020 20:07, Johannes Berg wrote:
>> +++ b/arch/um/include/asm/fpu/api.h
>> @@ -0,0 +1,14 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +#ifndef _ASM_UM_FPU_API_H
>> +#define _ASM_UM_FPU_API_H
>> +
>> +/* Copyright (c) 2020 Cambridge Greys Ltd
>> + * Copyright (c) 2020 Red Hat Inc.
>> + * A set of "dummy" defines to allow the direct inclusion
>> + * of x86 optimized copy, xor, etc routines into the
>> + * UML code tree. */
>> +
>> +#define kernel_fpu_begin() (void)0
>> +#define kernel_fpu_end() (void)0
> I think I would prefer those to be static inlines, but YMMV.
>
> johannes
>
>> diff --git a/arch/um/include/asm/xor-x86.h b/arch/um/include/asm/xor-x86.h
>> new file mode 120000
>> index 000000000000..beff7de6890d
>> --- /dev/null
>> +++ b/arch/um/include/asm/xor-x86.h
>> @@ -0,0 +1 @@
>> +../../../x86/include/asm/xor.h
> Do these really need to be symlinks? Last I looked, it seemed that
> arch/x86/include/asm/ is actually in the include path?

It is included, but it is included quite far down the list.

We pick up a few things out of there, but if we leave them "as is" they 
all default to their least optimized versions. The results clearly 
demonstrate that too - 30% difference on 64 bit and > 100% on 32 bit.

This is because we do not perform alternatives substitution. Our 
"alternatives" processing function in the UML startup is a noop.

My idea was to override that to the extent possible and get whatever 
mileage is possible without that.

I can give it a try to see how it looks if I use the x86 feature table 
and other bits which are picked up from there, but working with that is 
like pulling teeth without anaesthetic.

On the positive side this means that we can copy the alternatives code 
on x86.

I can give it another go. I tried early on and it was a bit painful.

A.

>
>> --- a/arch/um/include/asm/xor.h
>> +++ b/arch/um/include/asm/xor.h
>> @@ -1,7 +1,22 @@
>>   /* SPDX-License-Identifier: GPL-2.0 */
>> -#include <asm-generic/xor.h>
>> +#ifndef _ASM_UM_XOR_H
>> +#define _ASM_UM_XOR_H
>> +
>> +#ifdef CONFIG_64BIT
>> +#undef CONFIG_X86_32
>> +#else
>> +#define CONFIG_X86_32 1
>> +#endif
>> +
>> +#include <asm/cpufeature.h>
>> +#include <asm/xor-x86.h>
> and thus this could just be
>
> #include_next <asm/xor.h>
>
> without the symlink?
>
>> diff --git a/arch/um/include/asm/xor_32.h b/arch/um/include/asm/xor_32.h
>> new file mode 120000
>> index 000000000000..8a0894e996d7
>> --- /dev/null
>> +++ b/arch/um/include/asm/xor_32.h
>
> And the others don't exist in um anyway, so probably aren't needed?
>
> But maybe I'm confused about the include path, I didn't double-check
> now.
>
> johannes
>
>
>
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
>

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 21:57     ` Anton Ivanov
@ 2020-12-11 22:00       ` Johannes Berg
  2020-12-11 22:40         ` Anton Ivanov
  2020-12-14  9:07         ` Anton Ivanov
  0 siblings, 2 replies; 21+ messages in thread
From: Johannes Berg @ 2020-12-11 22:00 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard

On Fri, 2020-12-11 at 21:57 +0000, Anton Ivanov wrote:
> 
> > > --- /dev/null
> > > +++ b/arch/um/include/asm/xor-x86.h
> > > @@ -0,0 +1 @@
> > > +../../../x86/include/asm/xor.h
> > Do these really need to be symlinks? Last I looked, it seemed that
> > arch/x86/include/asm/ is actually in the include path?
> 
> It is included, but it is included quite far down the list.

I see. So you're saying basically we'll get asm-generic/xor.h before the
x86 version, and then we're getting the worst possible implementation,
right?

> We pick up a few things out of there, but if we leave them "as is" they 
> all default to their least optimized versions. The results clearly 
> demonstrate that too - 30% difference on 64 bit and > 100% on 32 bit.

Right.

> This is because we do not perform alternatives substitution. Our 
> "alternatives" processing function in the UML startup is a noop.

Oh, so we *do* get x86, but compatibility with ancient CPUs?

> My idea was to override that to the extent possible and get whatever 
> mileage is possible without that.

Makes sense.

> I can give it a try to see how it looks if I use the x86 feature table 
> and other bits which are picked up from there, but working with that is 
> like pulling teeth without anaesthetic.
> 
> On the positive side this means that we can copy the alternatives code 
> on x86.
> 
> I can give it another go. I tried early on and it was a bit painful.

Yeah, no, not sure ...

Maybe just doing something like

#include "../../../x86/include/asm/xor.h"

would be acceptable? It seems a bit better to me in the sense of being
more obvious than the symlinks... but dunno.

johannes


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 22:00       ` Johannes Berg
@ 2020-12-11 22:40         ` Anton Ivanov
  2020-12-14  9:07         ` Anton Ivanov
  1 sibling, 0 replies; 21+ messages in thread
From: Anton Ivanov @ 2020-12-11 22:40 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard

On 11/12/2020 22:00, Johannes Berg wrote:
> On Fri, 2020-12-11 at 21:57 +0000, Anton Ivanov wrote:
>>>> --- /dev/null
>>>> +++ b/arch/um/include/asm/xor-x86.h
>>>> @@ -0,0 +1 @@
>>>> +../../../x86/include/asm/xor.h
>>> Do these really need to be symlinks? Last I looked, it seemed that
>>> arch/x86/include/asm/ is actually in the include path?
>> It is included, but it is included quite far down the list.
> I see. So you're saying basically we'll get asm-generic/xor.h before the
> x86 version, and then we're getting the worst possible implementation,
> right?

A x86 implementation which is at "worst case scenario defaults" and has not undergone an alternative replacement for the actual CPU features can be as bad as the generic. In fact, in some cases generic may even be better :(

>> We pick up a few things out of there, but if we leave them "as is" they
>> all default to their least optimized versions. The results clearly
>> demonstrate that too - 30% difference on 64 bit and > 100% on 32 bit.
> Right.
>
>> This is because we do not perform alternatives substitution. Our
>> "alternatives" processing function in the UML startup is a noop.
> Oh, so we *do* get x86, but compatibility with ancient CPUs?

Not just ancient CPUs. Ancient BUGGY cpus. It is usually the worst case 
scenario implementation.

>
>> My idea was to override that to the extent possible and get whatever
>> mileage is possible without that.
> Makes sense.
>
>> I can give it a try to see how it looks if I use the x86 feature table
>> and other bits which are picked up from there, but working with that is
>> like pulling teeth without anaesthetic.
>>
>> On the positive side this means that we can copy the alternatives code
>> on x86.
>>
>> I can give it another go. I tried early on and it was a bit painful.
> Yeah, no, not sure ...
>
> Maybe just doing something like
>
> #include "../../../x86/include/asm/xor.h"
>
> would be acceptable? It seems a bit better to me in the sense of being
> more obvious than the symlinks... but dunno.

That (and everything else) relies on the CPU Features available macros. 
I "cheated" on those and created our own using just one ulong - the 5-6 
bits which are relevant to us. That is the original idea behind pulling 
things in and symlinking - to make sure they pick OUR defs, not the 
whole array of features out of the x86 tree.

We also need to noop or redefine a few things like fpu_exit, fpu_enter, etc.

However, based on the discussion we have had so far,  I should revisit 
this and do it ONLY where it is needed, not in all cases.

I will give it another go on Monday.

Looking at the results, it's definitely worth it. For me it is a 
question of how to do it, not "should we do it". 30% difference is in 
the realm of "definitely worth it".

>
> johannes
>
>

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-11 22:00       ` Johannes Berg
  2020-12-11 22:40         ` Anton Ivanov
@ 2020-12-14  9:07         ` Anton Ivanov
  2020-12-14  9:12           ` Johannes Berg
  1 sibling, 1 reply; 21+ messages in thread
From: Anton Ivanov @ 2020-12-14  9:07 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard


On 11/12/2020 22:00, Johannes Berg wrote:
> On Fri, 2020-12-11 at 21:57 +0000, Anton Ivanov wrote:
>>>> --- /dev/null
>>>> +++ b/arch/um/include/asm/xor-x86.h
>>>> @@ -0,0 +1 @@
>>>> +../../../x86/include/asm/xor.h
>>> Do these really need to be symlinks? Last I looked, it seemed that
>>> arch/x86/include/asm/ is actually in the include path?
>> It is included, but it is included quite far down the list.
> I see. So you're saying basically we'll get asm-generic/xor.h before the
> x86 version, and then we're getting the worst possible implementation,
> right?
>
>> We pick up a few things out of there, but if we leave them "as is" they
>> all default to their least optimized versions. The results clearly
>> demonstrate that too - 30% difference on 64 bit and > 100% on 32 bit.
> Right.
>
>> This is because we do not perform alternatives substitution. Our
>> "alternatives" processing function in the UML startup is a noop.
> Oh, so we *do* get x86, but compatibility with ancient CPUs?

I had a look at the alternatives processing in x86 once again. The exact definition of what we get to use is: "ancient, buggy CPUs in SMP mode".

So in addition to using one of the worst case scenario implementations, we also do not do patching of SMP verbiage to UP where appropriate which is done on x86.

I just had a go at trying to reuse the aforementioned alternatives processing "as is" from the x86 tree.

This is pretty much a no-go from the start. We can't use it. It relies on "owning" int handlers and generating int instructions in places. If I understand it correctly, it will interfere with gdb by doing its own INT 3 work. Key parts of it are also "if-defed away" from us at present.

IMHO - it will have to be rewritten mostly from scratch for UML.

I will have a look if we can reuse the cpu feature and bug definitions instead of using our own. This will allow us to reuse the bits which relate to crypto - xor, etc as those are cases/ifdefs instead of alternatives.

A.

>
>> My idea was to override that to the extent possible and get whatever
>> mileage is possible without that.
> Makes sense.
>
>> I can give it a try to see how it looks if I use the x86 feature table
>> and other bits which are picked up from there, but working with that is
>> like pulling teeth without anaesthetic.
>>
>> On the positive side this means that we can copy the alternatives code
>> on x86.
>>
>> I can give it another go. I tried early on and it was a bit painful.
> Yeah, no, not sure ...
>
> Maybe just doing something like
>
> #include "../../../x86/include/asm/xor.h"
>
> would be acceptable? It seems a bit better to me in the sense of being
> more obvious than the symlinks... but dunno.
>
> johannes
>
>
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
>
-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-14  9:07         ` Anton Ivanov
@ 2020-12-14  9:12           ` Johannes Berg
  2020-12-14  9:36             ` Anton Ivanov
  0 siblings, 1 reply; 21+ messages in thread
From: Johannes Berg @ 2020-12-14  9:12 UTC (permalink / raw)
  To: Anton Ivanov, linux-um; +Cc: richard

On Mon, 2020-12-14 at 09:07 +0000, Anton Ivanov wrote:
> 
> I had a look at the alternatives processing in x86 once again. The
> exact definition of what we get to use is: "ancient, buggy CPUs in SMP
> mode".

fun.

> So in addition to using one of the worst case scenario
> implementations, we also do not do patching of SMP verbiage to UP
> where appropriate which is done on x86.

right.

> I just had a go at trying to reuse the aforementioned alternatives
> processing "as is" from the x86 tree.
> 
> This is pretty much a no-go from the start. We can't use it. It relies
> on "owning" int handlers and generating int instructions in places. If
> I understand it correctly, it will interfere with gdb by doing its own
> INT 3 work. Key parts of it are also "if-defed away" from us at
> present.
> 
> IMHO - it will have to be rewritten mostly from scratch for UML.
> 
> I will have a look if we can reuse the cpu feature and bug definitions
> instead of using our own. This will allow us to reuse the bits which
> relate to crypto - xor, etc as those are cases/ifdefs instead of
> alternatives.

I agree.

But I feel like something I said sent you on this path, and that never
was my intent. I don't mind using the x86 header files in a fashion
similar to what you did, I just didn't like the symlinks because it
seems those would be awkward if somebody ever wants to port UML to some
other architecture ...

Maybe just put there header files with

#include "../../x86/include/asm/xor.h"

or something like that just to avoid the symlinks?

johannes


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v4 2/7] um: enable the use of optimized xor routines in UML
  2020-12-14  9:12           ` Johannes Berg
@ 2020-12-14  9:36             ` Anton Ivanov
  0 siblings, 0 replies; 21+ messages in thread
From: Anton Ivanov @ 2020-12-14  9:36 UTC (permalink / raw)
  To: Johannes Berg, linux-um; +Cc: richard


On 14/12/2020 09:12, Johannes Berg wrote:
> On Mon, 2020-12-14 at 09:07 +0000, Anton Ivanov wrote:
>> I had a look at the alternatives processing in x86 once again. The
>> exact definition of what we get to use is: "ancient, buggy CPUs in SMP
>> mode".
> fun.
>
>> So in addition to using one of the worst case scenario
>> implementations, we also do not do patching of SMP verbiage to UP
>> where appropriate which is done on x86.
> right.
>
>> I just had a go at trying to reuse the aforementioned alternatives
>> processing "as is" from the x86 tree.
>>
>> This is pretty much a no-go from the start. We can't use it. It relies
>> on "owning" int handlers and generating int instructions in places. If
>> I understand it correctly, it will interfere with gdb by doing its own
>> INT 3 work. Key parts of it are also "if-defed away" from us at
>> present.
>>
>> IMHO - it will have to be rewritten mostly from scratch for UML.
>>
>> I will have a look if we can reuse the cpu feature and bug definitions
>> instead of using our own. This will allow us to reuse the bits which
>> relate to crypto - xor, etc as those are cases/ifdefs instead of
>> alternatives.
> I agree.
>
> But I feel like something I said sent you on this path, and that never
> was my intent. I don't mind using the x86 header files in a fashion
> similar to what you did, I just didn't like the symlinks because it
> seems those would be awkward if somebody ever wants to port UML to some
> other architecture ...
>
> Maybe just put there header files with
>
> #include "../../x86/include/asm/xor.h"
>
> or something like that just to avoid the symlinks?

I agree with you - that is how we should use them.

We still need to have "features" code and boot time CPU detection.

I am trying to figure out if it is worth it to borrow some of that instead of my original hack which reused the names with a different back-end. That is another can of worms because some of that code is actually generated at compile time by x86 so I need to figure out how to pull that into our build system.

> johannes
>
>
-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2020-12-14  9:36 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-12-11 17:45 Amended and retested for 32 bit "borrow ops" series anton.ivanov
2020-12-11 17:45 ` [PATCH v4 1/7] um: allow the use of glibc functions instead of builtins anton.ivanov
2020-12-11 20:03   ` Johannes Berg
2020-12-11 17:45 ` [PATCH v4 2/7] um: enable the use of optimized xor routines in UML anton.ivanov
2020-12-11 20:07   ` Johannes Berg
2020-12-11 21:57     ` Anton Ivanov
2020-12-11 22:00       ` Johannes Berg
2020-12-11 22:40         ` Anton Ivanov
2020-12-14  9:07         ` Anton Ivanov
2020-12-14  9:12           ` Johannes Berg
2020-12-14  9:36             ` Anton Ivanov
2020-12-11 17:45 ` [PATCH v4 3/7] um: "borrow" atomics from x86 architecture anton.ivanov
2020-12-11 20:08   ` Johannes Berg
2020-12-11 21:31     ` Anton Ivanov
2020-12-11 17:45 ` [PATCH v4 4/7] um: add a UML specific futex implementation anton.ivanov
2020-12-11 20:10   ` Johannes Berg
2020-12-11 17:45 ` [PATCH v4 5/7] um: "borrow" cmpxchg from x86 tree in UML anton.ivanov
2020-12-11 17:45 ` [PATCH v4 6/7] um: swithch futex ops to cmpxchg anton.ivanov
2020-12-11 20:12   ` Johannes Berg
2020-12-11 21:33     ` Anton Ivanov
2020-12-11 17:45 ` [PATCH v4 7/7] um: borrow bitops from the x86 tree anton.ivanov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox