* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
@ 2010-01-05 18:24 Leif Lindholm
2010-01-05 19:43 ` Jamie Lokier
0 siblings, 1 reply; 12+ messages in thread
From: Leif Lindholm @ 2010-01-05 18:24 UTC (permalink / raw)
To: linux-arm-kernel
The SWP instruction was deprecated in the ARMv6 architecture, superseded
by the LDREX/STREX family of instructions for
load-linked/store-conditional operations. The ARMv7 multiprocessing
extensions mandate that SWP/SWPB instructions are treated as undefined
from reset, with the ability to enable them through the System Control
Register SW bit.
This patch adds the alternative solution to emulate the SWP and SWPB
instructions using LDREX/STREX sequences, and log statistics to
/proc/cpu/swp_emulation. To correctly deal with copy-on-write, it also
modifies cpu_v7_set_pte_ext to change the mappings to priviliged RO when
user RO.
Signed-off-by: Leif Lindholm <leif.lindholm@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
---
arch/arm/kernel/Makefile | 1
arch/arm/kernel/swp_emulate.c | 297 +++++++++++++++++++++++++++++++++++++++++
arch/arm/mm/Kconfig | 20 +++
arch/arm/mm/proc-v7.S | 6 +
4 files changed, 324 insertions(+), 0 deletions(-)
create mode 100644 arch/arm/kernel/swp_emulate.c
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index dd00f74..d1befbc 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_ARM_THUMBEE) += thumbee.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_ARM_UNWIND) += unwind.o
obj-$(CONFIG_HAVE_TCM) += tcm.o
+obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o
obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o
AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
new file mode 100644
index 0000000..c77040e
--- /dev/null
+++ b/arch/arm/kernel/swp_emulate.c
@@ -0,0 +1,297 @@
+/*
+ * linux/arch/arm/kernel/swp_emulate.c
+ *
+ * Copyright (C) 2009 ARM Limited
+ * __user_* functions adapted from include/asm/uaccess.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Implements emulation of the SWP/SWPB instructions using load-exclusive and
+ * store-exclusive for processors that have them disabled (or future ones that
+ * might not implement them).
+ *
+ * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ * Where: Rt = destination
+ * Rt2 = source
+ * Rn = address
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+
+/*
+ * Error-checking SWP macros implemented using ldrex{b}/strex{b}
+ */
+#define __user_swp_asm(data, addr, res) \
+ __asm__ __volatile__( \
+ " mov r3, %1\n" \
+ "0: ldrex %1, [%2]\n" \
+ "1: strex %0, r3, [%2]\n" \
+ " cmp %0, #0\n" \
+ " movne %0, %3\n" \
+ "2:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .align 2\n" \
+ "3: mov %0, %4\n" \
+ " b 2b\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 3\n" \
+ " .long 0b, 3b\n" \
+ " .long 1b, 3b\n" \
+ " .previous" \
+ : "=&r" (res), "+r" (data) \
+ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
+ : "cc", "r3")
+
+#define __user_swpb_asm(data, addr, res) \
+ __asm__ __volatile__( \
+ " mov r3, %1\n" \
+ "0: ldrexb %1, [%2]\n" \
+ "1: strexb %0, r3, [%2]\n" \
+ " cmp %0, #0\n" \
+ " movne %0, %3\n" \
+ "2:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .align 2\n" \
+ "3: mov %0, %4\n" \
+ " b 2b\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 3\n" \
+ " .long 0b, 3b\n" \
+ " .long 1b, 3b\n" \
+ " .previous" \
+ : "=&r" (res), "+r" (data) \
+ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
+ : "cc", "r3")
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+#define EXTRACT_REG_NUM(instruction, offset) \
+ (((instruction) & (0xf << (offset))) >> (offset))
+#define RN_OFFSET 16
+#define RT_OFFSET 12
+#define RT2_OFFSET 0
+
+static unsigned int swpcounter;
+static unsigned int swpbcounter;
+static unsigned int abtcounter;
+static long previous_pid;
+
+#ifdef CONFIG_PROC_FS
+static int proc_read_status(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ char *p = page;
+ int len;
+
+ p += sprintf(p, "Emulated SWP:\t\t%u\n", swpcounter);
+ p += sprintf(p, "Emulated SWPB:\t\t%u\n", swpbcounter);
+ p += sprintf(p, "Aborted SWP{B}:\t\t%u\n", abtcounter);
+ if (previous_pid != 0)
+ p += sprintf(p, "Last process:\t\t%ld\n", previous_pid);
+
+ len = (p - page) - off;
+ if (len < 0)
+ len = 0;
+
+ *eof = (len <= count) ? 1 : 0;
+ *start = page + off;
+
+ return len;
+}
+#endif
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+static void set_segfault(struct pt_regs *regs, unsigned long addr)
+{
+ siginfo_t info;
+
+ if (find_vma(current->mm, addr) == NULL)
+ info.si_code = SEGV_MAPERR;
+ else
+ info.si_code = SEGV_ACCERR;
+
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_addr = (void __user *)regs->ARM_pc;
+
+ pr_debug("SWP{B} emulation: access caused memory abort!\n");
+ arm_notify_die("Illegal memory access", regs, &info, 0, 0);
+
+ if (abtcounter == UINT_MAX)
+ printk(KERN_WARNING \
+ "SWP{B} emulation abort counter wrapped!\n");
+ abtcounter++;
+}
+
+static int emulate_swp(struct pt_regs *regs, unsigned int address,
+ unsigned int destreg, unsigned int data)
+{
+ unsigned int res = 0;
+
+ if (address & 0x3) {
+ /* SWP to unaligned address not permitted */
+ pr_debug("SWP instruction on unaligned pointer!\n");
+ return -EFAULT;
+ }
+
+ do {
+ if (res == -EAGAIN)
+ cond_resched();
+
+ smp_mb();
+ __user_swp_asm(data, address, res);
+ } while ((res == -EAGAIN) && !signal_pending(current));
+
+ if (res == 0) {
+ smp_mb();
+ regs->uregs[destreg] = data;
+ if (swpcounter == UINT_MAX)
+ printk(KERN_WARNING \
+ "SWP emulation counter wrapped!\n");
+ swpcounter++;
+ }
+
+ return res;
+}
+
+static int emulate_swpb(struct pt_regs *regs, unsigned int address,
+ unsigned int destreg, unsigned int data)
+{
+ unsigned int res = 0;
+
+ do {
+ if (res == -EAGAIN)
+ cond_resched();
+
+ smp_mb();
+ __user_swpb_asm(data, address, res);
+ } while ((res == -EAGAIN) && !signal_pending(current));
+
+ if (res == 0) {
+ smp_mb();
+ regs->uregs[destreg] = data;
+ if (swpbcounter == UINT_MAX)
+ printk(KERN_WARNING \
+ "SWPB emulation counter wrapped!\n");
+ swpbcounter++;
+ }
+
+ return res;
+}
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls dedicated functions for SWP/SWPB and deals
+ * with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, unsigned int instr)
+{
+ unsigned int address, destreg, data;
+ unsigned int res = 0;
+
+ if (current->pid != previous_pid) {
+ pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
+ current->comm, (unsigned long)current->pid);
+ previous_pid = current->pid;
+ }
+
+ address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
+ data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
+ destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
+
+ pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
+ EXTRACT_REG_NUM(instr, RN_OFFSET), address,
+ destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);
+
+ /* Check access in reasonable access range for both SWP and SWPB */
+ if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+ pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+ (void *)address);
+ res = -EFAULT;
+ } else {
+ /*
+ * Bit 22 of the instruction distinguishes between the SWP and
+ * SWPB variants (bit set means SWPB).
+ */
+ if ((instr & (1 << 22)) == 0)
+ res = emulate_swp(regs, address, destreg, data);
+ else
+ res = emulate_swpb(regs, address, destreg, data);
+ }
+
+ if (res == 0) {
+ /*
+ * On successful emulation, revert the adjustment to the PC
+ * made in kernel/traps.c in order to resume execution@the
+ * instruction following the SWP{B}.
+ */
+ regs->ARM_pc += 4;
+ } else if (res == -EFAULT) {
+ /*
+ * Memory errors do not mean emulation failed.
+ * Set up signal info to return SEGV, then return OK
+ */
+ set_segfault(regs, address);
+ }
+
+ return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
+ */
+static struct undef_hook swp_hook = {
+ .instr_mask = 0x0fb00ff0,
+ .instr_val = 0x01000090,
+ .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
+ .cpsr_val = USR_MODE,
+ .fn = swp_handler
+};
+
+/*
+ * Register handler and create status file in /proc/cpu
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init swp_emulation_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *res;
+
+#ifndef CONFIG_ALIGNMENT_TRAP
+ res = proc_mkdir("cpu", NULL);
+ if (!res)
+ return -ENOMEM;
+
+ res = create_proc_entry("swp_emulation", S_IRUGO, res);
+#else
+ res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL);
+#endif
+
+ if (!res)
+ return -ENOMEM;
+
+ res->read_proc = proc_read_status;
+#endif /* CONFIG_PROC_FS */
+
+ printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n");
+ register_undef_hook(&swp_hook);
+
+ return 0;
+}
+
+late_initcall(swp_emulation_init);
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index dd4698c..c45b0d3 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -630,6 +630,26 @@ config ARM_THUMBEE
Say Y here if you have a CPU with the ThumbEE extension and code to
make use of it. Say N for code that can run on CPUs without ThumbEE.
+config SWP_EMULATE
+ bool "Emulate SWP/SWPB instructions"
+ depends on CPU_V7
+ default y if SMP
+ help
+ ARMv6 architecture deprecates use of the SWP/SWPB instructions.
+ ARMv7 multiprocessing extensions introduce the ability to disable
+ these instructions, triggering an undefined instruction exception
+ when executed. Say Y here to enable software emulation of these
+ instructions for userspace (not kernel) using LDREX/STREX.
+ Also creates /proc/cpu/swp_emulation for statistics.
+
+ NOTE: when accessing uncached shared regions, LDREX/STREX rely
+ on an external transaction monitoring block called a global
+ monitor to maintain update atomicity. If your system does not
+ implement a global monitor, this option can cause programs that
+ are map uncached memory to deadlock.
+
+ If unsure, say Y.
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
depends on ARCH_SUPPORTS_BIG_ENDIAN
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3a28521..cb85aeb 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -147,8 +147,10 @@ ENTRY(cpu_v7_set_pte_ext)
tst r1, #L_PTE_USER
orrne r3, r3, #PTE_EXT_AP1
+#ifndef CONFIG_SWP_EMULATE
tstne r3, #PTE_EXT_APX
bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+#endif
tst r1, #L_PTE_EXEC
orreq r3, r3, #PTE_EXT_XN
@@ -275,6 +277,10 @@ __v7_setup:
#ifdef CONFIG_CPU_ENDIAN_BE8
orr r6, r6, #1 << 25 @ big-endian page tables
#endif
+#ifdef CONFIG_SWP_EMULATE
+ orr r5, r5, #(1 << 10) @ set SW bit in "clear"
+ bic r6, r6, #(1 << 10) @ clear it in "mmuset"
+#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-05 18:24 [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3) Leif Lindholm
@ 2010-01-05 19:43 ` Jamie Lokier
2010-01-06 16:23 ` Catalin Marinas
2010-01-06 19:19 ` Leif Lindholm
0 siblings, 2 replies; 12+ messages in thread
From: Jamie Lokier @ 2010-01-05 19:43 UTC (permalink / raw)
To: linux-arm-kernel
Leif Lindholm wrote:
> +/*
> + * Error-checking SWP macros implemented using ldrex{b}/strex{b}
> + */
> +#define __user_swp_asm(data, addr, res) \
> + __asm__ __volatile__( \
> + " mov r3, %1\n" \
> + "0: ldrex %1, [%2]\n" \
> + "1: strex %0, r3, [%2]\n" \
> + " cmp %0, #0\n" \
> + " movne %0, %3\n" \
> + "2:\n" \
> + " .section .fixup,\"ax\"\n" \
> + " .align 2\n" \
> + "3: mov %0, %4\n" \
> + " b 2b\n" \
> + " .previous\n" \
> + " .section __ex_table,\"a\"\n" \
> + " .align 3\n" \
> + " .long 0b, 3b\n" \
> + " .long 1b, 3b\n" \
> + " .previous" \
> + : "=&r" (res), "+r" (data) \
> + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
> + : "cc", "r3")
> +
> +#define __user_swpb_asm(data, addr, res) \
> + __asm__ __volatile__( \
> + " mov r3, %1\n" \
> + "0: ldrexb %1, [%2]\n" \
> + "1: strexb %0, r3, [%2]\n" \
> + " cmp %0, #0\n" \
> + " movne %0, %3\n" \
> + "2:\n" \
> + " .section .fixup,\"ax\"\n" \
> + " .align 2\n" \
> + "3: mov %0, %4\n" \
> + " b 2b\n" \
> + " .previous\n" \
> + " .section __ex_table,\"a\"\n" \
> + " .align 3\n" \
> + " .long 0b, 3b\n" \
> + " .long 1b, 3b\n" \
> + " .previous" \
> + : "=&r" (res), "+r" (data) \
> + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \
> + : "cc", "r3")
They are almost identical. The duplication could be removed by
folding it into a single macro with another argument, like this:
#define __user_swp_asm(data, addr, res, B) \
" mov r3, %1\n" \
"0: ldrex"B" %1, [%2]\n" \
"1: strex"B" %0, r3, [%2]\n" \
Then calling it like this:
__user_swp_asm(data, address, res, "");
__user_swp_asm(data, address, res, "b");
> + if (abtcounter == UINT_MAX)
> + printk(KERN_WARNING \
> + "SWP{B} emulation abort counter wrapped!\n");
> + abtcounter++;
It's not atomic therefore not precise anyway. Why not just use u64,
and skip the test and printk? The code will be shorter and
ironically, faster with u64 because of omitting the test.
> +static int emulate_swp(struct pt_regs *regs, unsigned int address,
> + unsigned int destreg, unsigned int data)
> +static int emulate_swpb(struct pt_regs *regs, unsigned int address,
> + unsigned int destreg, unsigned int data)
Two almost identical functions. I wonder if it would be better to
merge them and take a flag. It would also reduce the compiled code size.
> + do {
> + if (res == -EAGAIN)
> + cond_resched();
> +
> + smp_mb();
> + __user_swp_asm(data, address, res);
> + } while ((res == -EAGAIN) && !signal_pending(current));
Why is the smp_mb() needed? I don't doubt there's a reason, but I
don't see what it is.
The loop looks ok, but it could be simpler in the common path:
while (1) {
smp_mb();
__user_swp_asm(data, address, res);
if (likely(res != -EAGAIN) || signal_pending(current))
break;
cond_resched();
}
> + if (res == 0) {
> + smp_mb();
Why the smp_mb?
> + regs->uregs[destreg] = data;
> + if (swpcounter == UINT_MAX)
> + printk(KERN_WARNING \
> + "SWP emulation counter wrapped!\n");
> + swpcounter++;
As with the other counter, it's probably faster and certainly shorter
code to use a u64 here and omit the test and print.
> +#ifndef CONFIG_ALIGNMENT_TRAP
> + res = proc_mkdir("cpu", NULL);
> + if (!res)
> + return -ENOMEM;
> +
> + res = create_proc_entry("swp_emulation", S_IRUGO, res);
> +#else
> + res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL);
> +#endif
? Is that to work with different kernel versions?
> diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> index 3a28521..cb85aeb 100644
> --- a/arch/arm/mm/proc-v7.S
> +++ b/arch/arm/mm/proc-v7.S
> @@ -147,8 +147,10 @@ ENTRY(cpu_v7_set_pte_ext)
>
> tst r1, #L_PTE_USER
> orrne r3, r3, #PTE_EXT_AP1
> +#ifndef CONFIG_SWP_EMULATE
> tstne r3, #PTE_EXT_APX
> bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
> +#endif
Is this the part which changes kernel memory access to fault when
writing to user-read-only pages? (I don't know anything about the
details of this, btw).
Is there any reason why this wasn't always like that?
-- Jamie
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-05 19:43 ` Jamie Lokier
@ 2010-01-06 16:23 ` Catalin Marinas
2010-01-06 16:32 ` Russell King - ARM Linux
2010-01-06 18:17 ` Jamie Lokier
2010-01-06 19:19 ` Leif Lindholm
1 sibling, 2 replies; 12+ messages in thread
From: Catalin Marinas @ 2010-01-06 16:23 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, 2010-01-05 at 19:43 +0000, Jamie Lokier wrote:
> Leif Lindholm wrote:
> > diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> > index 3a28521..cb85aeb 100644
> > --- a/arch/arm/mm/proc-v7.S
> > +++ b/arch/arm/mm/proc-v7.S
> > @@ -147,8 +147,10 @@ ENTRY(cpu_v7_set_pte_ext)
> >
> > tst r1, #L_PTE_USER
> > orrne r3, r3, #PTE_EXT_AP1
> > +#ifndef CONFIG_SWP_EMULATE
> > tstne r3, #PTE_EXT_APX
> > bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
> > +#endif
>
> Is this the part which changes kernel memory access to fault when
> writing to user-read-only pages? (I don't know anything about the
> details of this, btw).
>
> Is there any reason why this wasn't always like that?
On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
easily differentiate between the vectors page and a normal kernel page
unless we use another L_PTE_ bit. We need the vectors page to be
writable if there is no TLS register in hardware (I guess we could use
domain switching to override this though). But on ARMv7 we always have a
TLS register, so no need to write to the vectors page.
--
Catalin
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 16:23 ` Catalin Marinas
@ 2010-01-06 16:32 ` Russell King - ARM Linux
2010-01-06 16:58 ` Catalin Marinas
2010-01-06 18:17 ` Jamie Lokier
1 sibling, 1 reply; 12+ messages in thread
From: Russell King - ARM Linux @ 2010-01-06 16:32 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Jan 06, 2010 at 04:23:47PM +0000, Catalin Marinas wrote:
> On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
> easily differentiate between the vectors page and a normal kernel page
> unless we use another L_PTE_ bit. We need the vectors page to be
> writable if there is no TLS register in hardware (I guess we could use
> domain switching to override this though). But on ARMv7 we always have a
> TLS register, so no need to write to the vectors page.
That's not the only reason you'd want to write to the vectors page.
You'll want to write to it to install or change FIQ handlers.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 16:32 ` Russell King - ARM Linux
@ 2010-01-06 16:58 ` Catalin Marinas
0 siblings, 0 replies; 12+ messages in thread
From: Catalin Marinas @ 2010-01-06 16:58 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 2010-01-06 at 16:32 +0000, Russell King - ARM Linux wrote:
> On Wed, Jan 06, 2010 at 04:23:47PM +0000, Catalin Marinas wrote:
> > On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
> > easily differentiate between the vectors page and a normal kernel page
> > unless we use another L_PTE_ bit. We need the vectors page to be
> > writable if there is no TLS register in hardware (I guess we could use
> > domain switching to override this though). But on ARMv7 we always have a
> > TLS register, so no need to write to the vectors page.
>
> That's not the only reason you'd want to write to the vectors page.
> You'll want to write to it to install or change FIQ handlers.
I suspect that's not on a critical path, so the page tables could be
modified temporarily (or use set_fs but I have a patch which tries to
get rid of using the domains).
--
Catalin
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 16:23 ` Catalin Marinas
2010-01-06 16:32 ` Russell King - ARM Linux
@ 2010-01-06 18:17 ` Jamie Lokier
2010-01-07 9:59 ` Catalin Marinas
1 sibling, 1 reply; 12+ messages in thread
From: Jamie Lokier @ 2010-01-06 18:17 UTC (permalink / raw)
To: linux-arm-kernel
Catalin Marinas wrote:
> > Is there any reason why this wasn't always like that?
>
> On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
> easily differentiate between the vectors page and a normal kernel page
> unless we use another L_PTE_ bit. We need the vectors page to be
> writable if there is no TLS register in hardware (I guess we could use
> domain switching to override this though). But on ARMv7 we always have a
> TLS register, so no need to write to the vectors page.
Could you map the TLS page writable (kernel access only) at another
address at the same time, carefully choosing an aliasing address so
that no cache flush is needed after writing?
-- Jamie
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 18:17 ` Jamie Lokier
@ 2010-01-07 9:59 ` Catalin Marinas
2010-01-08 14:19 ` Jamie Lokier
0 siblings, 1 reply; 12+ messages in thread
From: Catalin Marinas @ 2010-01-07 9:59 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 2010-01-06 at 18:17 +0000, Jamie Lokier wrote:
> Catalin Marinas wrote:
> > > Is there any reason why this wasn't always like that?
> >
> > On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
> > easily differentiate between the vectors page and a normal kernel page
> > unless we use another L_PTE_ bit. We need the vectors page to be
> > writable if there is no TLS register in hardware (I guess we could use
> > domain switching to override this though). But on ARMv7 we always have a
> > TLS register, so no need to write to the vectors page.
>
> Could you map the TLS page writable (kernel access only) at another
> address at the same time, carefully choosing an aliasing address so
> that no cache flush is needed after writing?
It could be but I'm not sure it's worth. Leif's patch is intended for
ARMv7 where we always have a hardware TLS register. The FIQ handler
installation could probably be done by first calling set_fs(KERNEL_DS).
--
Catalin
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-07 9:59 ` Catalin Marinas
@ 2010-01-08 14:19 ` Jamie Lokier
0 siblings, 0 replies; 12+ messages in thread
From: Jamie Lokier @ 2010-01-08 14:19 UTC (permalink / raw)
To: linux-arm-kernel
Catalin Marinas wrote:
> On Wed, 2010-01-06 at 18:17 +0000, Jamie Lokier wrote:
> > Catalin Marinas wrote:
> > > > Is there any reason why this wasn't always like that?
> > >
> > > On ARMv6 onwards (where this user RO, kernel RO is supported) we cannot
> > > easily differentiate between the vectors page and a normal kernel page
> > > unless we use another L_PTE_ bit. We need the vectors page to be
> > > writable if there is no TLS register in hardware (I guess we could use
> > > domain switching to override this though). But on ARMv7 we always have a
> > > TLS register, so no need to write to the vectors page.
> >
> > Could you map the TLS page writable (kernel access only) at another
> > address at the same time, carefully choosing an aliasing address so
> > that no cache flush is needed after writing?
>
> It could be but I'm not sure it's worth. Leif's patch is intended for
> ARMv7 where we always have a hardware TLS register. The FIQ handler
> installation could probably be done by first calling set_fs(KERNEL_DS).
You're right.
I was confused about the meaning of (user RO == kernel RW) pages,
thinking that like on a 386, it makes copy_to_user expensive.
But I've just remembered that ARM uses ldrt and strt instructions for
user memory access... doh!
So user RO == kernel RW hardly matters.
-- Jamie
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-05 19:43 ` Jamie Lokier
2010-01-06 16:23 ` Catalin Marinas
@ 2010-01-06 19:19 ` Leif Lindholm
2010-01-06 19:36 ` Russell King - ARM Linux
1 sibling, 1 reply; 12+ messages in thread
From: Leif Lindholm @ 2010-01-06 19:19 UTC (permalink / raw)
To: linux-arm-kernel
> From: Jamie Lokier [mailto:jamie at shareable.org]
> Sent: 05 January 2010 19:43
> They are almost identical. The duplication could be removed by
> folding it into a single macro with another argument, like this:
>
> #define __user_swp_asm(data, addr, res, B) \
> " mov r3, %1\n" \
> "0: ldrex"B" %1, [%2]\n" \
> "1: strex"B" %0, r3, [%2]\n" \
>
> Then calling it like this:
>
> __user_swp_asm(data, address, res, "");
> __user_swp_asm(data, address, res, "b");
Neat.
But how about, for clarity, keeping the calling syntax in the calling
functions and add macros for the variants?:
#define __user_swp_asm_generic(data, addr, res, B) \
...
#define __user_swp_asm(data, addr, res) \
__user_swp_asm_generic(data, addr, res, "")
#define __user_swpb_asm(data, addr, res) \
__user_swp_asm_generic(data, addr, res, "b")
> > + if (abtcounter == UINT_MAX)
> > + printk(KERN_WARNING \
> > + "SWP{B} emulation abort counter wrapped!\n");
> > + abtcounter++;
>
> It's not atomic therefore not precise anyway. Why not just use u64,
> and skip the test and printk? The code will be shorter and
> ironically, faster with u64 because of omitting the test.
Fair enough. Will do.
> > +static int emulate_swp(struct pt_regs *regs, unsigned int address,
> > + unsigned int destreg, unsigned int data)
>
> > +static int emulate_swpb(struct pt_regs *regs, unsigned int address,
> > + unsigned int destreg, unsigned int data)
>
> Two almost identical functions. I wonder if it would be better to
> merge them and take a flag. It would also reduce the compiled code
> size.
I'm hesitant to add more than 4 arguments (adds stack overhead).
Also, at least cs2009q3 gcc (4.3.3) seems to inline both of these, so
not sure a codesize improvement would occur in practise.
An alternative would of course be to merge them into swp_handler(), but
that would make that function a bit messy.
> Why is the smp_mb() needed? I don't doubt there's a reason, but I
> don't see what it is.
A DMB is required between acquiring a lock and accessing the protected
resource, as well as between modifying a protected resource and
releasing its lock. Because there is no way to tell whether the SWP
performed a lock or unlock operation, inserting the barriers on
either side seemed the safest way to ensure that code written for ARMv5
or earlier would work as expected.
I guess a case could be made that this is an application problem and
should be resolved at that end.
> The loop looks ok, but it could be simpler in the common path:
>
> while (1) {
> smp_mb();
> __user_swp_asm(data, address, res);
> if (likely(res != -EAGAIN) || signal_pending(current))
> break;
> cond_resched();
> }
Good point, will do that in the next version.
> > +#ifndef CONFIG_ALIGNMENT_TRAP
> > + res = proc_mkdir("cpu", NULL);
> ? Is that to work with different kernel versions?
It's to ensure it would work (without console warnings) even if someone
decides to disable ALIGNMENT_TRAP. An alternative would be to strip the
creation of /proc/cpu out from mm/alignment.c and put it somewhere else
(or move the stats file somewhere else - but it seemed logical to group
with /proc/alignment).
/
Leif
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 19:19 ` Leif Lindholm
@ 2010-01-06 19:36 ` Russell King - ARM Linux
2010-01-14 13:08 ` Leif Lindholm
0 siblings, 1 reply; 12+ messages in thread
From: Russell King - ARM Linux @ 2010-01-06 19:36 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Jan 06, 2010 at 07:19:36PM -0000, Leif Lindholm wrote:
> Good point, will do that in the next version.
>
> > > +#ifndef CONFIG_ALIGNMENT_TRAP
> > > + res = proc_mkdir("cpu", NULL);
>
> > ? Is that to work with different kernel versions?
>
> It's to ensure it would work (without console warnings) even if someone
> decides to disable ALIGNMENT_TRAP. An alternative would be to strip the
> creation of /proc/cpu out from mm/alignment.c and put it somewhere else
> (or move the stats file somewhere else - but it seemed logical to group
> with /proc/alignment).
As I said when you previously posted this patch, that's definitely on
the cards. In fact, I'm about to commit the following patch for doing
exactly this.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 233a222..faeee3e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,6 +50,9 @@ config HAVE_TCM
bool
select GENERIC_ALLOCATOR
+config HAVE_PROC_CPU
+ bool
+
config NO_IOPORT
bool
@@ -1226,6 +1229,7 @@ config ALIGNMENT_TRAP
bool
depends on CPU_CP15_MMU
default y if !ARCH_EBSA110
+ select HAVE_PROC_CPU if PROC_FS
help
ARM processors cannot fetch/store information which is not
naturally aligned on the bus, i.e., a 4 byte fetch must start at an
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c6c57b6..5357e48 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <asm/unified.h>
#include <asm/cpu.h>
@@ -782,9 +783,21 @@ static int __init topology_init(void)
return 0;
}
-
subsys_initcall(topology_init);
+#ifdef CONFIG_HAVE_PROC_CPU
+static int __init proc_cpu_init(void)
+{
+ struct proc_dir_entry *res;
+
+ res = proc_mkdir("cpu", NULL);
+ if (!res)
+ return -ENOMEM;
+ return 0;
+}
+fs_initcall(proc_cpu_init);
+#endif
+
static const char *hwcap_str[] = {
"swp",
"half",
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b270d62..0c5eb69 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -898,11 +898,7 @@ static int __init alignment_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *res;
- res = proc_mkdir("cpu", NULL);
- if (!res)
- return -ENOMEM;
-
- res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res);
+ res = create_proc_entry("cpu/alignment", S_IWUSR | S_IRUGO, NULL);
if (!res)
return -ENOMEM;
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
2010-01-06 19:36 ` Russell King - ARM Linux
@ 2010-01-14 13:08 ` Leif Lindholm
0 siblings, 0 replies; 12+ messages in thread
From: Leif Lindholm @ 2010-01-14 13:08 UTC (permalink / raw)
To: linux-arm-kernel
> From: Russell King - ARM Linux [mailto:linux at arm.linux.org.uk]
> Sent: 06 January 2010 19:37
>
> > It's to ensure it would work (without console warnings) even if
> > someone decides to disable ALIGNMENT_TRAP. An alternative would be to
> > strip the creation of /proc/cpu out from mm/alignment.c and put it
> > somewhere else (or move the stats file somewhere else - but it seemed
> > logical to group with /proc/alignment).
>
> As I said when you previously posted this patch, that's definitely on
> the cards. In fact, I'm about to commit the following patch for doing
> exactly this.
Excellent, thanks. I'm just about to submit the next version, which relies
on that patch, and no longer checks ALIGNMENT_TRAP for anything.
Other changes:
- Code restructured a bit, from Jamie's comments. It did end up quite a bit
cleaner because of this.
- Comments added for memory barriers, as pointed out by Jamie.
- Added description in Documentation/arm/swp_emulation.
/
Leif
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3)
@ 2010-01-06 21:53 Jamie Lokier
0 siblings, 0 replies; 12+ messages in thread
From: Jamie Lokier @ 2010-01-06 21:53 UTC (permalink / raw)
To: linux-arm-kernel
Leif Lindholm wrote:
> > From: Jamie Lokier [mailto:jamie at shareable.org]
> > Then calling it like this:
> >
> > __user_swp_asm(data, address, res, "");
> > __user_swp_asm(data, address, res, "b");
>
> Neat.
> But how about, for clarity, keeping the calling syntax in the calling
> functions and add macros for the variants?:
>
> #define __user_swp_asm_generic(data, addr, res, B) \
> ...
> #define __user_swp_asm(data, addr, res) \
> __user_swp_asm_generic(data, addr, res, "")
> #define __user_swpb_asm(data, addr, res) \
> __user_swp_asm_generic(data, addr, res, "b")
I'd just call the generic one __user_swpX_data and call it - I don't
think the additional tiny macros add any clarity, particularly with
being used in one place just a few lines down. But it's totally
subjective and up to you.
> > > +static int emulate_swp(struct pt_regs *regs, unsigned int address,
> > > + unsigned int destreg, unsigned int data)
> >
> > > +static int emulate_swpb(struct pt_regs *regs, unsigned int address,
> > > + unsigned int destreg, unsigned int data)
> >
> > Two almost identical functions. I wonder if it would be better to
> > merge them and take a flag. It would also reduce the compiled code
> > size.
>
> I'm hesitant to add more than 4 arguments (adds stack overhead).
> Also, at least cs2009q3 gcc (4.3.3) seems to inline both of these, so
> not sure a codesize improvement would occur in practise.
If they are inlined, there is no stack overhead. Mainly I thought
it would make the source smaller/tidier tidier :-)
> > Why is the smp_mb() needed? I don't doubt there's a reason, but I
> > don't see what it is.
>
> A DMB is required between acquiring a lock and accessing the protected
> resource, as well as between modifying a protected resource and
> releasing its lock. Because there is no way to tell whether the SWP
> performed a lock or unlock operation, inserting the barriers on
> either side seemed the safest way to ensure that code written for ARMv5
> or earlier would work as expected.
>
> I guess a case could be made that this is an application problem and
> should be resolved at that end.
That's a really good reason, and thanks for thinking of it: To make
ARMv5 application code that doesn't know about DMB work properly. Any
code which is DMB-aware is likely to use LDREX/STREX itself, so this
is great match. :-)
However, please follow this advice from Documentation/SubmitChecklist:
>> 24: All memory barriers {e.g., barrier(), rmb(), wmb()} need a
>> comment in the source code that explains the logic of what
>> they are doing and why.
I think a comment is even more important in this case, because the
barriers are an ABI design decision you've made; nobody could deduce
why they are there from SMP correctness alone.
Unfortunately there are other places in threaded code that need DMB
(for example any good implementation of pthread_once), so ARMv5
threaded code can still fail in subtle, unpredictable ways :-( Is it
possible to turn off weak memory ordering, after a SWP instruction is
detected? Though even that would not ensure correctness of
pthread_once equivalent if it comes before any mutex locks in a program :(
I wonder, too, about what ARM ARM says about LDREX/STREX only working
on memory with the "Shared TLB attribute". Will single-threaded code
using SWP on mapped shared memory get its expected atomic behaviour at
all with your emulation?
> Good point, will do that in the next version.
>
> > > +#ifndef CONFIG_ALIGNMENT_TRAP
> > > + res = proc_mkdir("cpu", NULL);
>
> > ? Is that to work with different kernel versions?
>
> It's to ensure it would work (without console warnings) even if someone
> decides to disable ALIGNMENT_TRAP. An alternative would be to strip the
> creation of /proc/cpu out from mm/alignment.c and put it somewhere else
> (or move the stats file somewhere else - but it seemed logical to group
> with /proc/alignment).
Seems to me both should be sysfs CPU attributes anyway, but I don't
know much about that so don't take my word for it. The ifdef is kind
of ugly but maybe unavoidable.
-- Jamie
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2010-01-14 13:08 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-05 18:24 [PATCH] ARM: Add SWP/SWPB emulation for ARMv7 processors (v3) Leif Lindholm
2010-01-05 19:43 ` Jamie Lokier
2010-01-06 16:23 ` Catalin Marinas
2010-01-06 16:32 ` Russell King - ARM Linux
2010-01-06 16:58 ` Catalin Marinas
2010-01-06 18:17 ` Jamie Lokier
2010-01-07 9:59 ` Catalin Marinas
2010-01-08 14:19 ` Jamie Lokier
2010-01-06 19:19 ` Leif Lindholm
2010-01-06 19:36 ` Russell King - ARM Linux
2010-01-14 13:08 ` Leif Lindholm
-- strict thread matches above, loose matches on Subject: below --
2010-01-06 21:53 Jamie Lokier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).