* [PATCH v6] um: Enable preemption in UML
@ 2023-09-22 10:56 anton.ivanov
2023-09-22 10:56 ` Anton Ivanov
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: anton.ivanov @ 2023-09-22 10:56 UTC (permalink / raw)
To: linux-um; +Cc: johannes, richard, Anton Ivanov
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
1. Preemption requires saving/restoring FPU state. This patch
adds support for it using GCC intrinsics as well as appropriate
storage space in the thread structure.
2. irq critical sections need preempt_disable()/preempt_enable().
3. TLB critical sections need preempt_disable()/preempt_enable().
4. UML TLB flush is also invoked during a fork. This happens
with interrupts and preempt disabled which disagrees with the
standard mm locking via rwsem. The mm lock for this code path
had to be replaced with an rcu.
5. The FPU state area is statically allocated depending on
the enabled PREEMPT options. PREEMPT_DYNAMIC and chosing the
preemption model at start time is disabled for the UM arch.
Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
arch/um/Kconfig | 2 +-
arch/um/include/asm/fpu/api.h | 9 ++-
arch/um/include/asm/processor-generic.h | 4 ++
arch/um/kernel/Makefile | 4 ++
arch/um/kernel/fpu.c | 75 +++++++++++++++++++++++++
arch/um/kernel/irq.c | 2 +
arch/um/kernel/tlb.c | 21 ++++++-
7 files changed, 111 insertions(+), 6 deletions(-)
create mode 100644 arch/um/kernel/fpu.c
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index b5e179360534..19176fde82f3 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -11,7 +11,7 @@ config UML
select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
- select ARCH_NO_PREEMPT
+ select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
index 71bfd9ef3938..9e7680bf48f0 100644
--- a/arch/um/include/asm/fpu/api.h
+++ b/arch/um/include/asm/fpu/api.h
@@ -4,12 +4,15 @@
/* Copyright (c) 2020 Cambridge Greys Ltd
* Copyright (c) 2020 Red Hat Inc.
- * A set of "dummy" defines to allow the direct inclusion
- * of x86 optimized copy, xor, etc routines into the
- * UML code tree. */
+ */
+#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+#else
#define kernel_fpu_begin() (void)0
#define kernel_fpu_end() (void)0
+#endif
static inline bool irq_fpu_usable(void)
{
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 7414154b8e9a..9970e70be1e4 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -44,6 +44,10 @@ struct thread_struct {
} cb;
} u;
} request;
+#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
+/* Intel docs require xsave/xrestore area to be aligned to 64 bytes */
+ u8 fpu[2048] __aligned(64);
+#endif
};
#define INIT_THREAD \
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 811188be954c..c616e884a488 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
+obj-$(CONFIG_PREEMPT) += fpu.o
+obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
USER_OBJS := config.o
+CFLAGS_fpu.o += -mxsave -mxsaveopt
+
include $(srctree)/arch/um/scripts/Makefile.rules
targets := config.c config.tmp capflags.c
diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
new file mode 100644
index 000000000000..4817276b2a26
--- /dev/null
+++ b/arch/um/kernel/fpu.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Cambridge Greys Ltd
+ * Copyright (C) 2023 Red Hat Inc
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <asm/fpu/api.h>
+#include <asm/cpufeature.h>
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ */
+
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+/* UML and driver code it pulls out of the x86 tree knows about 387 features
+ * up to and including AVX512. TILE, etc are not yet supported.
+ */
+
+#define KNOWN_387_FEATURES 0xFF
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+
+ WARN_ON(this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, true);
+
+#ifdef CONFIG_64BIT
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+ __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else {
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else
+ __builtin_ia32_fxsave64(¤t->thread.fpu);
+ }
+#else
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
+ __builtin_ia32_xsaveopt(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else {
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xsave(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else
+ __builtin_ia32_fxsave(¤t->thread.fpu);
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ WARN_ON(!this_cpu_read(in_kernel_fpu));
+
+#ifdef CONFIG_64BIT
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xrstor64(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else
+ __builtin_ia32_fxrstor64(¤t->thread.fpu);
+#else
+ if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
+ __builtin_ia32_xrstor(¤t->thread.fpu, KNOWN_387_FEATURES);
+ else
+ __builtin_ia32_fxrstor(¤t->thread.fpu);
+#endif
+ this_cpu_write(in_kernel_fpu, false);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 635d44606bfe..c02525da45df 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
+ preempt_disable();
_sigio_handler(regs, irqs_suspended);
+ preempt_enable();
}
static struct irq_entry *get_irq_entry_by_fd(int fd)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 7d050ab0f78a..00b1870c2d62 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -322,6 +322,8 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
unsigned long addr = start_addr, next;
int ret = 0, userspace = 1;
+ preempt_disable();
+
hvc = INIT_HVC(mm, force, userspace);
pgd = pgd_offset(mm, addr);
do {
@@ -346,6 +348,7 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
"process: %d\n", task_tgid_vnr(current));
mm_idp->kill = 1;
}
+ preempt_enable();
}
static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
@@ -362,6 +365,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
mm = &init_mm;
hvc = INIT_HVC(mm, force, userspace);
+
+ preempt_disable();
+
for (addr = start; addr < end;) {
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd)) {
@@ -449,6 +455,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
if (err < 0)
panic("flush_tlb_kernel failed, errno = %d\n", err);
+
+ preempt_enable();
+
return updated;
}
@@ -466,6 +475,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
address &= PAGE_MASK;
+ preempt_disable();
+
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto kill;
@@ -520,6 +531,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
*pte = pte_mkuptodate(*pte);
+ preempt_enable();
return;
kill:
@@ -597,8 +609,13 @@ void force_flush_all(void)
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
- mmap_read_lock(mm);
+ /* We use a RCU lock instead of a mm lock, because
+ * this can be invoked out of critical/atomic sections
+ * and that does not agree with the sleepable semantics
+ * of the standard semaphore based mm lock.
+ */
+ rcu_read_lock();
for_each_vma(vmi, vma)
fix_range(mm, vma->vm_start, vma->vm_end, 1);
- mmap_read_unlock(mm);
+ rcu_read_unlock();
}
--
2.30.2
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH v6] um: Enable preemption in UML
2023-09-22 10:56 [PATCH v6] um: Enable preemption in UML anton.ivanov
@ 2023-09-22 10:56 ` Anton Ivanov
2023-09-22 11:22 ` Johannes Berg
2023-09-22 11:59 ` Peter Lafreniere
2 siblings, 0 replies; 6+ messages in thread
From: Anton Ivanov @ 2023-09-22 10:56 UTC (permalink / raw)
To: linux-um; +Cc: johannes, richard
On 22/09/2023 11:56, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>
> 1. Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics as well as appropriate
> storage space in the thread structure.
>
> 2. irq critical sections need preempt_disable()/preempt_enable().
>
> 3. TLB critical sections need preempt_disable()/preempt_enable().
New in this patch - 4 and 5.
>
> 4. UML TLB flush is also invoked during a fork. This happens
> with interrupts and preempt disabled which disagrees with the
> standard mm locking via rwsem. The mm lock for this code path
> had to be replaced with an rcu.
>
> 5. The FPU state area is statically allocated depending on
> the enabled PREEMPT options. PREEMPT_DYNAMIC and chosing the
> preemption model at start time is disabled for the UM arch.
>
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
> arch/um/Kconfig | 2 +-
> arch/um/include/asm/fpu/api.h | 9 ++-
> arch/um/include/asm/processor-generic.h | 4 ++
> arch/um/kernel/Makefile | 4 ++
> arch/um/kernel/fpu.c | 75 +++++++++++++++++++++++++
> arch/um/kernel/irq.c | 2 +
> arch/um/kernel/tlb.c | 21 ++++++-
> 7 files changed, 111 insertions(+), 6 deletions(-)
> create mode 100644 arch/um/kernel/fpu.c
>
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..19176fde82f3 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,7 @@ config UML
> select ARCH_HAS_KCOV
> select ARCH_HAS_STRNCPY_FROM_USER
> select ARCH_HAS_STRNLEN_USER
> - select ARCH_NO_PREEMPT
> + select ARCH_NO_PREEMPT_DYNAMIC
> select HAVE_ARCH_AUDITSYSCALL
> select HAVE_ARCH_KASAN if X86_64
> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..9e7680bf48f0 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -4,12 +4,15 @@
>
> /* Copyright (c) 2020 Cambridge Greys Ltd
> * Copyright (c) 2020 Red Hat Inc.
> - * A set of "dummy" defines to allow the direct inclusion
> - * of x86 optimized copy, xor, etc routines into the
> - * UML code tree. */
> + */
>
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +extern void kernel_fpu_begin(void);
> +extern void kernel_fpu_end(void);
> +#else
> #define kernel_fpu_begin() (void)0
> #define kernel_fpu_end() (void)0
> +#endif
>
> static inline bool irq_fpu_usable(void)
> {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..9970e70be1e4 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,10 @@ struct thread_struct {
> } cb;
> } u;
> } request;
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +/* Intel docs require xsave/xrestore area to be aligned to 64 bytes */
> + u8 fpu[2048] __aligned(64);
> +#endif
> };
>
> #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..c616e884a488 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o
> obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
> +obj-$(CONFIG_PREEMPT) += fpu.o
> +obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
>
> USER_OBJS := config.o
>
> +CFLAGS_fpu.o += -mxsave -mxsaveopt
> +
> include $(srctree)/arch/um/scripts/Makefile.rules
>
> targets := config.c config.tmp capflags.c
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4817276b2a26
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu/api.h>
> +#include <asm/cpufeature.h>
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +/* UML and driver code it pulls out of the x86 tree knows about 387 features
> + * up to and including AVX512. TILE, etc are not yet supported.
> + */
> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +void kernel_fpu_begin(void)
> +{
> + preempt_disable();
> +
> + WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> + this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else {
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else
> + __builtin_ia32_fxsave64(¤t->thread.fpu);
> + }
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else {
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xsave(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else
> + __builtin_ia32_fxsave(¤t->thread.fpu);
> + }
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> + WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor64(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else
> + __builtin_ia32_fxrstor64(¤t->thread.fpu);
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else
> + __builtin_ia32_fxrstor(¤t->thread.fpu);
> +#endif
> + this_cpu_write(in_kernel_fpu, false);
> +
> + preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +
> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 635d44606bfe..c02525da45df 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>
> void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
> {
> + preempt_disable();
> _sigio_handler(regs, irqs_suspended);
> + preempt_enable();
> }
>
> static struct irq_entry *get_irq_entry_by_fd(int fd)
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 7d050ab0f78a..00b1870c2d62 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -322,6 +322,8 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
> unsigned long addr = start_addr, next;
> int ret = 0, userspace = 1;
>
> + preempt_disable();
> +
> hvc = INIT_HVC(mm, force, userspace);
> pgd = pgd_offset(mm, addr);
> do {
> @@ -346,6 +348,7 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
> "process: %d\n", task_tgid_vnr(current));
> mm_idp->kill = 1;
> }
> + preempt_enable();
> }
>
> static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
> @@ -362,6 +365,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>
> mm = &init_mm;
> hvc = INIT_HVC(mm, force, userspace);
> +
> + preempt_disable();
> +
> for (addr = start; addr < end;) {
> pgd = pgd_offset(mm, addr);
> if (!pgd_present(*pgd)) {
> @@ -449,6 +455,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>
> if (err < 0)
> panic("flush_tlb_kernel failed, errno = %d\n", err);
> +
> + preempt_enable();
> +
> return updated;
> }
>
> @@ -466,6 +475,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>
> address &= PAGE_MASK;
>
> + preempt_disable();
> +
> pgd = pgd_offset(mm, address);
> if (!pgd_present(*pgd))
> goto kill;
> @@ -520,6 +531,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>
> *pte = pte_mkuptodate(*pte);
>
> + preempt_enable();
> return;
>
> kill:
> @@ -597,8 +609,13 @@ void force_flush_all(void)
> struct vm_area_struct *vma;
> VMA_ITERATOR(vmi, mm, 0);
>
> - mmap_read_lock(mm);
> + /* We use a RCU lock instead of a mm lock, because
> + * this can be invoked out of critical/atomic sections
> + * and that does not agree with the sleepable semantics
> + * of the standard semaphore based mm lock.
> + */
> + rcu_read_lock();
> for_each_vma(vmi, vma)
> fix_range(mm, vma->vm_start, vma->vm_end, 1);
> - mmap_read_unlock(mm);
> + rcu_read_unlock();
> }
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v6] um: Enable preemption in UML
2023-09-22 10:56 [PATCH v6] um: Enable preemption in UML anton.ivanov
2023-09-22 10:56 ` Anton Ivanov
@ 2023-09-22 11:22 ` Johannes Berg
2023-09-22 11:55 ` Anton Ivanov
2023-09-22 20:40 ` Richard Weinberger
2023-09-22 11:59 ` Peter Lafreniere
2 siblings, 2 replies; 6+ messages in thread
From: Johannes Berg @ 2023-09-22 11:22 UTC (permalink / raw)
To: anton.ivanov, linux-um; +Cc: richard
>
> 4. UML TLB flush is also invoked during a fork. This happens
> with interrupts and preempt disabled which disagrees with the
> standard mm locking via rwsem. The mm lock for this code path
> had to be replaced with an rcu.
For the record, even if I figured out this gets rid of the complaints,
I'm not entirely happy with this - yeah it's safe now, but it still
feels entirely wrong.
But I guess we can also do this and then remove it entirely like the
patch I just posted. Order probably doesn't matter much.
(Note my patch removes the locking completely since it's now invoked by
the kernel even under write mmap lock.)
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4817276b2a26
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else {
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES);
> + else
> + __builtin_ia32_fxsave64(¤t->thread.fpu);
:)
OK, I'll stop mentioning it ;-)
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +
git am complained here about blank line at EOF
> @@ -597,8 +609,13 @@ void force_flush_all(void)
> struct vm_area_struct *vma;
> VMA_ITERATOR(vmi, mm, 0);
>
> - mmap_read_lock(mm);
> + /* We use a RCU lock instead of a mm lock, because
> + * this can be invoked out of critical/atomic sections
> + * and that does not agree with the sleepable semantics
> + * of the standard semaphore based mm lock.
> + */
> + rcu_read_lock();
Yeah I guess ... Seems like a very mechanical description of what's
going on, rather than a description of why this is correct (which
assumes no preempt and no SMP)?
I'd have preferred that, but with the patch I just posted we'll just
kill this entirely so it doesn't matter in the end.
johannes
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v6] um: Enable preemption in UML
2023-09-22 11:22 ` Johannes Berg
@ 2023-09-22 11:55 ` Anton Ivanov
2023-09-22 20:40 ` Richard Weinberger
1 sibling, 0 replies; 6+ messages in thread
From: Anton Ivanov @ 2023-09-22 11:55 UTC (permalink / raw)
To: Johannes Berg, linux-um; +Cc: richard
On 22/09/2023 12:22, Johannes Berg wrote:
>>
>> 4. UML TLB flush is also invoked during a fork. This happens
>> with interrupts and preempt disabled which disagrees with the
>> standard mm locking via rwsem. The mm lock for this code path
>> had to be replaced with an rcu.
>
> For the record, even if I figured out this gets rid of the complaints,
> I'm not entirely happy with this - yeah it's safe now, but it still
> feels entirely wrong.
>
> But I guess we can also do this and then remove it entirely like the
> patch I just posted. Order probably doesn't matter much.
Indeed.
We opened the can of worms. It will be a while until they are all in bigger
and more comfortable new quarters.
>
> (Note my patch removes the locking completely since it's now invoked by
> the kernel even under write mmap lock.)
Ack.
>
>> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
>> new file mode 100644
>> index 000000000000..4817276b2a26
>> --- /dev/null
>> +++ b/arch/um/kernel/fpu.c
>
>> +#ifdef CONFIG_64BIT
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
>> + __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES);
>> + else {
>> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
>> + __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES);
>> + else
>> + __builtin_ia32_fxsave64(¤t->thread.fpu);
>
> :)
>
> OK, I'll stop mentioning it ;-)
>
>> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
>> +
>
> git am complained here about blank line at EOF
>
>> @@ -597,8 +609,13 @@ void force_flush_all(void)
>> struct vm_area_struct *vma;
>> VMA_ITERATOR(vmi, mm, 0);
>>
>> - mmap_read_lock(mm);
>> + /* We use a RCU lock instead of a mm lock, because
>> + * this can be invoked out of critical/atomic sections
>> + * and that does not agree with the sleepable semantics
>> + * of the standard semaphore based mm lock.
>> + */
>> + rcu_read_lock();
>
> Yeah I guess ... Seems like a very mechanical description of what's
> going on, rather than a description of why this is correct (which
> assumes no preempt and no SMP)?
>
> I'd have preferred that, but with the patch I just posted we'll just
> kill this entirely so it doesn't matter in the end.
>
> johannes
>
--
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH v6] um: Enable preemption in UML
2023-09-22 11:22 ` Johannes Berg
2023-09-22 11:55 ` Anton Ivanov
@ 2023-09-22 20:40 ` Richard Weinberger
1 sibling, 0 replies; 6+ messages in thread
From: Richard Weinberger @ 2023-09-22 20:40 UTC (permalink / raw)
To: Johannes Berg; +Cc: anton ivanov, linux-um
----- Ursprüngliche Mail -----
> Von: "Johannes Berg" <johannes@sipsolutions.net>
>> - mmap_read_lock(mm);
>> + /* We use a RCU lock instead of a mm lock, because
>> + * this can be invoked out of critical/atomic sections
>> + * and that does not agree with the sleepable semantics
>> + * of the standard semaphore based mm lock.
>> + */
>> + rcu_read_lock();
>
> Yeah I guess ... Seems like a very mechanical description of what's
> going on, rather than a description of why this is correct (which
> assumes no preempt and no SMP)?
Is the VMA list actually RCU aware?
On UML we might get away with that, but I agree with Johannes, this needs a more
detailed explanation.
Thanks,
//richard
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v6] um: Enable preemption in UML
2023-09-22 10:56 [PATCH v6] um: Enable preemption in UML anton.ivanov
2023-09-22 10:56 ` Anton Ivanov
2023-09-22 11:22 ` Johannes Berg
@ 2023-09-22 11:59 ` Peter Lafreniere
2 siblings, 0 replies; 6+ messages in thread
From: Peter Lafreniere @ 2023-09-22 11:59 UTC (permalink / raw)
To: anton.ivanov; +Cc: johannes, linux-um, richard
On Fri, Sept 22, 2023 at 06:56, anton.ivanov@cambridgegreys.com wrote:
>
> From: Anton Ivanov anton.ivanov@cambridgegreys.com
>
>
> 1. Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics as well as appropriate
> storage space in the thread structure.
>
> 2. irq critical sections need preempt_disable()/preempt_enable().
>
> 3. TLB critical sections need preempt_disable()/preempt_enable().
>
> 4. UML TLB flush is also invoked during a fork. This happens
> with interrupts and preempt disabled which disagrees with the
> standard mm locking via rwsem. The mm lock for this code path
> had to be replaced with an rcu.
"Had to be"? Why? It's a very unconventional locking method
and should have some justification.
>
> 5. The FPU state area is statically allocated depending on
> the enabled PREEMPT options. PREEMPT_DYNAMIC and chosing the
> preemption model at start time is disabled for the UM arch.
>
> Signed-off-by: Anton Ivanov anton.ivanov@cambridgegreys.com
>
> ---
> arch/um/Kconfig | 2 +-
> arch/um/include/asm/fpu/api.h | 9 ++-
> arch/um/include/asm/processor-generic.h | 4 ++
> arch/um/kernel/Makefile | 4 ++
> arch/um/kernel/fpu.c | 75 +++++++++++++++++++++++++
> arch/um/kernel/irq.c | 2 +
> arch/um/kernel/tlb.c | 21 ++++++-
> 7 files changed, 111 insertions(+), 6 deletions(-)
> create mode 100644 arch/um/kernel/fpu.c
>
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..19176fde82f3 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,7 @@ config UML
> select ARCH_HAS_KCOV
> select ARCH_HAS_STRNCPY_FROM_USER
> select ARCH_HAS_STRNLEN_USER
> - select ARCH_NO_PREEMPT
> + select ARCH_NO_PREEMPT_DYNAMIC
> select HAVE_ARCH_AUDITSYSCALL
> select HAVE_ARCH_KASAN if X86_64
> select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..9e7680bf48f0 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -4,12 +4,15 @@
>
> /* Copyright (c) 2020 Cambridge Greys Ltd
> * Copyright (c) 2020 Red Hat Inc.
> - * A set of "dummy" defines to allow the direct inclusion
> - * of x86 optimized copy, xor, etc routines into the
> - * UML code tree. */
> + /
>
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +extern void kernel_fpu_begin(void);
> +extern void kernel_fpu_end(void);
> +#else
> #define kernel_fpu_begin() (void)0
> #define kernel_fpu_end() (void)0
> +#endif
>
> static inline bool irq_fpu_usable(void)
> {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..9970e70be1e4 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,10 @@ struct thread_struct {
> } cb;
> } u;
> } request;
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +/ Intel docs require xsave/xrestore area to be aligned to 64 bytes /
> + u8 fpu[2048] __aligned(64);
> +#endif
> };
>
> #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..c616e884a488 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o
> obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
> +obj-$(CONFIG_PREEMPT) += fpu.o
> +obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
>
> USER_OBJS := config.o
>
> +CFLAGS_fpu.o += -mxsave -mxsaveopt
> +
> include $(srctree)/arch/um/scripts/Makefile.rules
>
> targets := config.c config.tmp capflags.c
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4817276b2a26
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
>
> +#include <linux/init.h>
>
> +#include <asm/fpu/api.h>
>
> +#include <asm/cpufeature.h>
>
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + /
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +/ UML and driver code it pulls out of the x86 tree knows about 387 features
> + * up to and including AVX512. TILE, etc are not yet supported.
> + */
> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +void kernel_fpu_begin(void)
> +{
> + preempt_disable();
> +
> + WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> + this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt64(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else {
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xsave64(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else
> + __builtin_ia32_fxsave64(¤t->thread.fpu);
>
> + }
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> + __builtin_ia32_xsaveopt(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else {
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xsave(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else
> + __builtin_ia32_fxsave(¤t->thread.fpu);
>
> + }
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
Johannes is right about how flattening the else branch makes
it a bit more readable, imo.
> +void kernel_fpu_end(void)
> +{
> + WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor64(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else
> + __builtin_ia32_fxrstor64(¤t->thread.fpu);
>
> +#else
> + if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> + __builtin_ia32_xrstor(¤t->thread.fpu, KNOWN_387_FEATURES);
>
> + else
> + __builtin_ia32_fxrstor(¤t->thread.fpu);
>
> +#endif
> + this_cpu_write(in_kernel_fpu, false);
> +
> + preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
--->8---
> @@ -597,8 +609,13 @@ void force_flush_all(void)
> struct vm_area_struct vma;
> VMA_ITERATOR(vmi, mm, 0);
>
> - mmap_read_lock(mm);
> + / We use a RCU lock instead of a mm lock, because
> + * this can be invoked out of critical/atomic sections
> + * and that does not agree with the sleepable semantics
> + * of the standard semaphore based mm lock.
> + */
As Johannes said, this comment doesn't explain why it's needed,
just that an rcu lock is needed.
> + rcu_read_lock();
> for_each_vma(vmi, vma)
> fix_range(mm, vma->vm_start, vma->vm_end, 1);
>
> - mmap_read_unlock(mm);
> + rcu_read_unlock();
> }
> --
> 2.30.2
I didn't audit the assortment of preempt_{enable,disable}() pairs,
but I did test this patch with defconfig + debug.config + CONFIG_PREEMPT=y
for 15 minutes and I didn't see any warnings that used to show up.
I think that we should fix what currently exists, then rebase this
PREEMPT patch on top of the results. Plus that'll give me time to look
into the avx + !xsave edge case :)
Cheers,
Peter
_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-09-22 20:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-22 10:56 [PATCH v6] um: Enable preemption in UML anton.ivanov
2023-09-22 10:56 ` Anton Ivanov
2023-09-22 11:22 ` Johannes Berg
2023-09-22 11:55 ` Anton Ivanov
2023-09-22 20:40 ` Richard Weinberger
2023-09-22 11:59 ` Peter Lafreniere
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox