linux-mips.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] MIPS vdso and signal delivery optimization (v2)
@ 2010-02-19  0:13 David Daney
  2010-02-19  0:13 ` [PATCH 1/3] MIPS: Add SYSCALL to uasm David Daney
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: David Daney @ 2010-02-19  0:13 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: David Daney

This patch set creates a vdso and moves the signal
trampolines to it from their previous home on the stack.

In the original patch set:
http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=49EE3B0F.3040506%40caviumnetworks.com

I stated:

Tested with a 64-bit kernel on a Cavium Octeon cn3860 where I have the
following results from lmbench2:

Before:
n64 - Signal handler overhead: 14.517 microseconds
n32 - Signal handler overhead: 14.497 microseconds
o32 - Signal handler overhead: 16.637 microseconds

After:

n64 - Signal handler overhead: 7.935 microseconds
n32 - Signal handler overhead: 7.334 microseconds
o32 - Signal handler overhead: 8.628 microsecond

All that is still true.

Improvements from the first version:

* Compiles and runs in 32-bit kernels (on qemu at least).

* Updated for linux-queue based 2.6.33-rc8

David Daney (3):
  MIPS: Add SYSCALL to uasm.
  MIPS: Preliminary vdso.
  MIPS: Move signal trampolines off of the stack.

 arch/mips/include/asm/abi.h         |    6 +-
 arch/mips/include/asm/elf.h         |    4 +
 arch/mips/include/asm/mmu.h         |    5 +-
 arch/mips/include/asm/mmu_context.h |    2 +-
 arch/mips/include/asm/processor.h   |   11 +++-
 arch/mips/include/asm/uasm.h        |    1 +
 arch/mips/include/asm/vdso.h        |   29 +++++++++
 arch/mips/kernel/Makefile           |    2 +-
 arch/mips/kernel/signal-common.h    |    5 --
 arch/mips/kernel/signal.c           |   86 ++++++---------------------
 arch/mips/kernel/signal32.c         |   55 ++++-------------
 arch/mips/kernel/signal_n32.c       |   26 ++------
 arch/mips/kernel/syscall.c          |    6 ++-
 arch/mips/kernel/vdso.c             |  112 +++++++++++++++++++++++++++++++++++
 arch/mips/mm/uasm.c                 |   19 +++++-
 15 files changed, 226 insertions(+), 143 deletions(-)
 create mode 100644 arch/mips/include/asm/vdso.h
 create mode 100644 arch/mips/kernel/vdso.c

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/3] MIPS: Add SYSCALL to uasm.
  2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
@ 2010-02-19  0:13 ` David Daney
  2010-03-16 19:55   ` Ralf Baechle
  2010-02-19  0:13 ` [PATCH 2/3] MIPS: Preliminary vdso David Daney
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: David Daney @ 2010-02-19  0:13 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: David Daney

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/mips/include/asm/uasm.h |    1 +
 arch/mips/mm/uasm.c          |   19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index b99bd07..32fe2ec 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -102,6 +102,7 @@ Ip_0(_tlbwr);
 Ip_u3u1u2(_xor);
 Ip_u2u1u3(_xori);
 Ip_u2u1msbu3(_dins);
+Ip_u1(_syscall);
 
 /* Handle labels. */
 struct uasm_label {
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 1581e98..d22d7bc 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -31,7 +31,8 @@ enum fields {
 	BIMM = 0x040,
 	JIMM = 0x080,
 	FUNC = 0x100,
-	SET = 0x200
+	SET = 0x200,
+	SCIMM = 0x400
 };
 
 #define OP_MASK		0x3f
@@ -52,6 +53,8 @@ enum fields {
 #define FUNC_SH		0
 #define SET_MASK	0x7
 #define SET_SH		0
+#define SCIMM_MASK	0xfffff
+#define SCIMM_SH	6
 
 enum opcode {
 	insn_invalid,
@@ -64,7 +67,7 @@ enum opcode {
 	insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd,
 	insn_sd, insn_sll, insn_sra, insn_srl, insn_rotr, insn_subu, insn_sw,
 	insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori,
-	insn_dins
+	insn_dins, insn_syscall
 };
 
 struct insn {
@@ -136,6 +139,7 @@ static struct insn insn_table[] __cpuinitdata = {
 	{ insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD },
 	{ insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
 	{ insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
+	{ insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM},
 	{ insn_invalid, 0, 0 }
 };
 
@@ -208,6 +212,14 @@ static inline __cpuinit u32 build_jimm(u32 arg)
 	return (arg >> 2) & JIMM_MASK;
 }
 
+static inline __cpuinit u32 build_scimm(u32 arg)
+{
+	if (arg & ~SCIMM_MASK)
+		printk(KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & SCIMM_MASK) << SCIMM_SH;
+}
+
 static inline __cpuinit u32 build_func(u32 arg)
 {
 	if (arg & ~FUNC_MASK)
@@ -266,6 +278,8 @@ static void __cpuinit build_insn(u32 **buf, enum opcode opc, ...)
 		op |= build_func(va_arg(ap, u32));
 	if (ip->fields & SET)
 		op |= build_set(va_arg(ap, u32));
+	if (ip->fields & SCIMM)
+		op |= build_scimm(va_arg(ap, u32));
 	va_end(ap);
 
 	**buf = op;
@@ -391,6 +405,7 @@ I_0(_tlbwr)
 I_u3u1u2(_xor)
 I_u2u1u3(_xori)
 I_u2u1msbu3(_dins);
+I_u1(_syscall);
 
 /* Handle labels. */
 void __cpuinit uasm_build_label(struct uasm_label **lab, u32 *addr, int lid)
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] MIPS: Preliminary vdso.
  2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
  2010-02-19  0:13 ` [PATCH 1/3] MIPS: Add SYSCALL to uasm David Daney
@ 2010-02-19  0:13 ` David Daney
  2010-02-23 20:40   ` Manuel Lauss
  2010-03-16 19:55   ` Ralf Baechle
  2010-02-19  0:13 ` [PATCH 3/3] MIPS: Move signal trampolines off of the stack David Daney
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 14+ messages in thread
From: David Daney @ 2010-02-19  0:13 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: David Daney

This is a preliminary patch to add a vdso to all user processes.
Still missing are ELF headers and .eh_frame information.  But it is
enough to allow us to move signal trampolines off of the stack.  Note
that emulation of branch delay slots in the FPU emulator still
requires the stack.

We allocate a single page (the vdso) and write all possible signal
trampolines into it.  The stack is moved down by one page and the vdso
is mapped into this space.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/mips/include/asm/elf.h         |    4 +
 arch/mips/include/asm/mmu.h         |    5 +-
 arch/mips/include/asm/mmu_context.h |    2 +-
 arch/mips/include/asm/processor.h   |   11 +++-
 arch/mips/include/asm/vdso.h        |   29 +++++++++
 arch/mips/kernel/Makefile           |    2 +-
 arch/mips/kernel/syscall.c          |    6 ++-
 arch/mips/kernel/vdso.c             |  112 +++++++++++++++++++++++++++++++++++
 8 files changed, 165 insertions(+), 6 deletions(-)
 create mode 100644 arch/mips/include/asm/vdso.h
 create mode 100644 arch/mips/kernel/vdso.c

diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index e53d7be..1c3dbf0 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -367,4 +367,8 @@ extern const char *__elf_platform;
 #define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 #endif
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp);
 #endif /* _ASM_ELF_H */
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 4063edd..c436138 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -1,6 +1,9 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
-typedef unsigned long mm_context_t[NR_CPUS];
+typedef struct {
+	unsigned long asid[NR_CPUS];
+	void *vdso;
+} mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index ada4975..73a640b 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -109,7 +109,7 @@ extern unsigned long smtc_asid_mask;
 
 #endif
 
-#define cpu_context(cpu, mm)	((mm)->context[cpu])
+#define cpu_context(cpu, mm)	((mm)->context.asid[cpu])
 #define cpu_asid(cpu, mm)	(cpu_context((cpu), (mm)) & ASID_MASK)
 #define asid_cache(cpu)		(cpu_data[cpu].asid_cache)
 
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 087a888..ab38791 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -33,13 +33,19 @@ extern void (*cpu_wait)(void);
 
 extern unsigned int vced_count, vcei_count;
 
+/*
+ * A special page (the vdso) is mapped into all processes at the very
+ * top of the virtual memory space.
+ */
+#define SPECIAL_PAGES_SIZE PAGE_SIZE
+
 #ifdef CONFIG_32BIT
 /*
  * User space process size: 2GB. This is hardcoded into a few places,
  * so don't change it unless you know what you are doing.
  */
 #define TASK_SIZE	0x7fff8000UL
-#define STACK_TOP	TASK_SIZE
+#define STACK_TOP	((TASK_SIZE & PAGE_MASK) - SPECIAL_PAGES_SIZE)
 
 /*
  * This decides where the kernel will search for a free chunk of vm
@@ -59,7 +65,8 @@ extern unsigned int vced_count, vcei_count;
 #define TASK_SIZE32	0x7fff8000UL
 #define TASK_SIZE	0x10000000000UL
 #define STACK_TOP	\
-      (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE)
+	(((test_thread_flag(TIF_32BIT_ADDR) ?				\
+	   TASK_SIZE32 : TASK_SIZE) & PAGE_MASK) - SPECIAL_PAGES_SIZE)
 
 /*
  * This decides where the kernel will search for a free chunk of vm
diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h
new file mode 100644
index 0000000..cca56aa
--- /dev/null
+++ b/arch/mips/include/asm/vdso.h
@@ -0,0 +1,29 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009 Cavium Networks
+ */
+
+#ifndef __ASM_VDSO_H
+#define __ASM_VDSO_H
+
+#include <linux/types.h>
+
+
+#ifdef CONFIG_32BIT
+struct mips_vdso {
+	u32 signal_trampoline[2];
+	u32 rt_signal_trampoline[2];
+};
+#else  /* !CONFIG_32BIT */
+struct mips_vdso {
+	u32 o32_signal_trampoline[2];
+	u32 o32_rt_signal_trampoline[2];
+	u32 rt_signal_trampoline[2];
+	u32 n32_rt_signal_trampoline[2];
+};
+#endif /* CONFIG_32BIT */
+
+#endif /* __ASM_VDSO_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 924192b..642ae95 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y		:= head.o init_task.o vmlinux.lds
 
 obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 		   ptrace.o reset.o setup.o signal.o syscall.o \
-		   time.o topology.o traps.o unaligned.o watch.o
+		   time.o topology.o traps.o unaligned.o watch.o vdso.o
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index e6cb831..d15eb20 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -79,7 +79,11 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	int do_color_align;
 	unsigned long task_size;
 
-	task_size = STACK_TOP;
+#ifdef CONFIG_32BIT
+	task_size = TASK_SIZE;
+#else /* Must be CONFIG_64BIT*/
+	task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE;
+#endif
 
 	if (len > task_size)
 		return -ENOMEM;
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
new file mode 100644
index 0000000..b773c11
--- /dev/null
+++ b/arch/mips/kernel/vdso.c
@@ -0,0 +1,112 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009, 2010 Cavium Networks, Inc.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/binfmts.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/unistd.h>
+
+#include <asm/vdso.h>
+#include <asm/uasm.h>
+
+/*
+ * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
+ */
+#define __NR_O32_sigreturn		4119
+#define __NR_O32_rt_sigreturn		4193
+#define __NR_N32_rt_sigreturn		6211
+
+static struct page *vdso_page;
+
+static void __init install_trampoline(u32 *tramp, unsigned int sigreturn)
+{
+	uasm_i_addiu(&tramp, 2, 0, sigreturn);	/* li v0, sigreturn */
+	uasm_i_syscall(&tramp, 0);
+}
+
+static int __init init_vdso(void)
+{
+	struct mips_vdso *vdso;
+
+	vdso_page = alloc_page(GFP_KERNEL);
+	if (!vdso_page)
+		panic("Cannot allocate vdso");
+
+	vdso = vmap(&vdso_page, 1, 0, PAGE_KERNEL);
+	if (!vdso)
+		panic("Cannot map vdso");
+	clear_page(vdso);
+
+	install_trampoline(vdso->rt_signal_trampoline, __NR_rt_sigreturn);
+#ifdef CONFIG_32BIT
+	install_trampoline(vdso->signal_trampoline, __NR_sigreturn);
+#else
+	install_trampoline(vdso->n32_rt_signal_trampoline,
+			   __NR_N32_rt_sigreturn);
+	install_trampoline(vdso->o32_signal_trampoline, __NR_O32_sigreturn);
+	install_trampoline(vdso->o32_rt_signal_trampoline,
+			   __NR_O32_rt_sigreturn);
+#endif
+
+	vunmap(vdso);
+
+	pr_notice("init_vdso successfull\n");
+
+	return 0;
+}
+device_initcall(init_vdso);
+
+static unsigned long vdso_addr(unsigned long start)
+{
+	return STACK_TOP;
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	int ret;
+	unsigned long addr;
+	struct mm_struct *mm = current->mm;
+
+	down_write(&mm->mmap_sem);
+
+	addr = vdso_addr(mm->start_stack);
+
+	addr = get_unmapped_area(NULL, addr, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				      VM_ALWAYSDUMP,
+				      &vdso_page);
+
+	if (ret)
+		goto up_fail;
+
+	mm->context.vdso = (void *)addr;
+
+up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+		return "[vdso]";
+	return NULL;
+}
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] MIPS: Move signal trampolines off of the stack.
  2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
  2010-02-19  0:13 ` [PATCH 1/3] MIPS: Add SYSCALL to uasm David Daney
  2010-02-19  0:13 ` [PATCH 2/3] MIPS: Preliminary vdso David Daney
@ 2010-02-19  0:13 ` David Daney
  2010-03-16 19:56   ` Ralf Baechle
  2010-02-19  2:02 ` [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
  2010-02-19 13:57 ` Ralf Baechle
  4 siblings, 1 reply; 14+ messages in thread
From: David Daney @ 2010-02-19  0:13 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: David Daney

This is a follow on to the vdso patch.

Since all processes now have signal trampolines permanently mapped, we
can use those instead of putting the trampoline on the stack and
invalidating the corresponding icache across all CPUs.  We also get
rid of a bunch of ICACHE_REFILLS_WORKAROUND_WAR code.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/mips/include/asm/abi.h      |    6 ++-
 arch/mips/kernel/signal-common.h |    5 --
 arch/mips/kernel/signal.c        |   86 ++++++++-----------------------------
 arch/mips/kernel/signal32.c      |   55 ++++++------------------
 arch/mips/kernel/signal_n32.c    |   26 +++---------
 5 files changed, 43 insertions(+), 135 deletions(-)

diff --git a/arch/mips/include/asm/abi.h b/arch/mips/include/asm/abi.h
index 1dd74fb..9252d9b 100644
--- a/arch/mips/include/asm/abi.h
+++ b/arch/mips/include/asm/abi.h
@@ -13,12 +13,14 @@
 #include <asm/siginfo.h>
 
 struct mips_abi {
-	int (* const setup_frame)(struct k_sigaction * ka,
+	int (* const setup_frame)(void *sig_return, struct k_sigaction *ka,
 	                          struct pt_regs *regs, int signr,
 	                          sigset_t *set);
-	int (* const setup_rt_frame)(struct k_sigaction * ka,
+	const unsigned long	signal_return_offset;
+	int (* const setup_rt_frame)(void *sig_return, struct k_sigaction *ka,
 	                       struct pt_regs *regs, int signr,
 	                       sigset_t *set, siginfo_t *info);
+	const unsigned long	rt_signal_return_offset;
 	const unsigned long	restart;
 };
 
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index 6c8e8c4..10263b4 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -26,11 +26,6 @@
  */
 extern void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 				 size_t frame_size);
-/*
- * install trampoline code to get back from the sig handler
- */
-extern int install_sigtramp(unsigned int __user *tramp, unsigned int syscall);
-
 /* Check and clear pending FPU exceptions in saved CSR */
 extern int fpcsr_pending(unsigned int __user *fpcsr);
 
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index d0c68b5..2099d5a 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -32,6 +32,7 @@
 #include <asm/ucontext.h>
 #include <asm/cpu-features.h>
 #include <asm/war.h>
+#include <asm/vdso.h>
 
 #include "signal-common.h"
 
@@ -44,47 +45,20 @@ extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
 extern asmlinkage int fpu_emulator_save_context(struct sigcontext __user *sc);
 extern asmlinkage int fpu_emulator_restore_context(struct sigcontext __user *sc);
 
-/*
- * Horribly complicated - with the bloody RM9000 workarounds enabled
- * the signal trampolines is moving to the end of the structure so we can
- * increase the alignment without breaking software compatibility.
- */
-#if ICACHE_REFILLS_WORKAROUND_WAR == 0
-
 struct sigframe {
 	u32 sf_ass[4];		/* argument save space for o32 */
-	u32 sf_code[2];		/* signal trampoline */
+	u32 sf_pad[2];		/* Was: signal trampoline */
 	struct sigcontext sf_sc;
 	sigset_t sf_mask;
 };
 
 struct rt_sigframe {
 	u32 rs_ass[4];		/* argument save space for o32 */
-	u32 rs_code[2];		/* signal trampoline */
+	u32 rs_pad[2];		/* Was: signal trampoline */
 	struct siginfo rs_info;
 	struct ucontext rs_uc;
 };
 
-#else
-
-struct sigframe {
-	u32 sf_ass[4];			/* argument save space for o32 */
-	u32 sf_pad[2];
-	struct sigcontext sf_sc;	/* hw context */
-	sigset_t sf_mask;
-	u32 sf_code[8] ____cacheline_aligned;	/* signal trampoline */
-};
-
-struct rt_sigframe {
-	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_pad[2];
-	struct siginfo rs_info;
-	struct ucontext rs_uc;
-	u32 rs_code[8] ____cacheline_aligned;	/* signal trampoline */
-};
-
-#endif
-
 /*
  * Helper routines
  */
@@ -266,32 +240,6 @@ void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 	return (void __user *)((sp - frame_size) & (ICACHE_REFILLS_WORKAROUND_WAR ? ~(cpu_icache_line_size()-1) : ALMASK));
 }
 
-int install_sigtramp(unsigned int __user *tramp, unsigned int syscall)
-{
-	int err;
-
-	/*
-	 * Set up the return code ...
-	 *
-	 *         li      v0, __NR__foo_sigreturn
-	 *         syscall
-	 */
-
-	err = __put_user(0x24020000 + syscall, tramp + 0);
-	err |= __put_user(0x0000000c         , tramp + 1);
-	if (ICACHE_REFILLS_WORKAROUND_WAR) {
-		err |= __put_user(0, tramp + 2);
-		err |= __put_user(0, tramp + 3);
-		err |= __put_user(0, tramp + 4);
-		err |= __put_user(0, tramp + 5);
-		err |= __put_user(0, tramp + 6);
-		err |= __put_user(0, tramp + 7);
-	}
-	flush_cache_sigtramp((unsigned long) tramp);
-
-	return err;
-}
-
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -484,8 +432,8 @@ badframe:
 }
 
 #ifdef CONFIG_TRAD_SIGNALS
-static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs,
-	int signr, sigset_t *set)
+static int setup_frame(void *sig_return, struct k_sigaction *ka,
+		       struct pt_regs *regs, int signr, sigset_t *set)
 {
 	struct sigframe __user *frame;
 	int err = 0;
@@ -494,8 +442,6 @@ static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto give_sigsegv;
 
-	err |= install_sigtramp(frame->sf_code, __NR_sigreturn);
-
 	err |= setup_sigcontext(regs, &frame->sf_sc);
 	err |= __copy_to_user(&frame->sf_mask, set, sizeof(*set));
 	if (err)
@@ -515,7 +461,7 @@ static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs,
 	regs->regs[ 5] = 0;
 	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
 	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) frame->sf_code;
+	regs->regs[31] = (unsigned long) sig_return;
 	regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler;
 
 	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
@@ -529,8 +475,9 @@ give_sigsegv:
 }
 #endif
 
-static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs,
-	int signr, sigset_t *set, siginfo_t *info)
+static int setup_rt_frame(void *sig_return, struct k_sigaction *ka,
+			  struct pt_regs *regs,	int signr, sigset_t *set,
+			  siginfo_t *info)
 {
 	struct rt_sigframe __user *frame;
 	int err = 0;
@@ -539,8 +486,6 @@ static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto give_sigsegv;
 
-	err |= install_sigtramp(frame->rs_code, __NR_rt_sigreturn);
-
 	/* Create siginfo.  */
 	err |= copy_siginfo_to_user(&frame->rs_info, info);
 
@@ -573,7 +518,7 @@ static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs,
 	regs->regs[ 5] = (unsigned long) &frame->rs_info;
 	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
 	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) frame->rs_code;
+	regs->regs[31] = (unsigned long) sig_return;
 	regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler;
 
 	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
@@ -590,8 +535,11 @@ give_sigsegv:
 struct mips_abi mips_abi = {
 #ifdef CONFIG_TRAD_SIGNALS
 	.setup_frame	= setup_frame,
+	.signal_return_offset = offsetof(struct mips_vdso, signal_trampoline),
 #endif
 	.setup_rt_frame	= setup_rt_frame,
+	.rt_signal_return_offset =
+		offsetof(struct mips_vdso, rt_signal_trampoline),
 	.restart	= __NR_restart_syscall
 };
 
@@ -599,6 +547,8 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
 	struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs)
 {
 	int ret;
+	struct mips_abi *abi = current->thread.abi;
+	void *vdso = current->mm->context.vdso;
 
 	switch(regs->regs[0]) {
 	case ERESTART_RESTARTBLOCK:
@@ -619,9 +569,11 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
 	regs->regs[0] = 0;		/* Don't deal with this again.  */
 
 	if (sig_uses_siginfo(ka))
-		ret = current->thread.abi->setup_rt_frame(ka, regs, sig, oldset, info);
+		ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset,
+					  ka, regs, sig, oldset, info);
 	else
-		ret = current->thread.abi->setup_frame(ka, regs, sig, oldset);
+		ret = abi->setup_frame(vdso + abi->signal_return_offset,
+				       ka, regs, sig, oldset);
 
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 03abaf0..a0ed0e0 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 #include <asm/fpu.h>
 #include <asm/war.h>
+#include <asm/vdso.h>
 
 #include "signal-common.h"
 
@@ -47,8 +48,6 @@ extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 __user
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
-#define __NR_O32_sigreturn		4119
-#define __NR_O32_rt_sigreturn		4193
 #define __NR_O32_restart_syscall        4253
 
 /* 32-bit compatibility types */
@@ -77,47 +76,20 @@ struct ucontext32 {
 	compat_sigset_t     uc_sigmask;   /* mask last for extensibility */
 };
 
-/*
- * Horribly complicated - with the bloody RM9000 workarounds enabled
- * the signal trampolines is moving to the end of the structure so we can
- * increase the alignment without breaking software compatibility.
- */
-#if ICACHE_REFILLS_WORKAROUND_WAR == 0
-
 struct sigframe32 {
 	u32 sf_ass[4];		/* argument save space for o32 */
-	u32 sf_code[2];		/* signal trampoline */
+	u32 sf_pad[2];		/* Was: signal trampoline */
 	struct sigcontext32 sf_sc;
 	compat_sigset_t sf_mask;
 };
 
 struct rt_sigframe32 {
 	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_code[2];			/* signal trampoline */
+	u32 rs_pad[2];			/* Was: signal trampoline */
 	compat_siginfo_t rs_info;
 	struct ucontext32 rs_uc;
 };
 
-#else  /* ICACHE_REFILLS_WORKAROUND_WAR */
-
-struct sigframe32 {
-	u32 sf_ass[4];			/* argument save space for o32 */
-	u32 sf_pad[2];
-	struct sigcontext32 sf_sc;	/* hw context */
-	compat_sigset_t sf_mask;
-	u32 sf_code[8] ____cacheline_aligned;	/* signal trampoline */
-};
-
-struct rt_sigframe32 {
-	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_pad[2];
-	compat_siginfo_t rs_info;
-	struct ucontext32 rs_uc;
-	u32 rs_code[8] __attribute__((aligned(32)));	/* signal trampoline */
-};
-
-#endif	/* !ICACHE_REFILLS_WORKAROUND_WAR */
-
 /*
  * sigcontext handlers
  */
@@ -598,8 +570,8 @@ badframe:
 	force_sig(SIGSEGV, current);
 }
 
-static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
-	int signr, sigset_t *set)
+static int setup_frame_32(void *sig_return, struct k_sigaction *ka,
+			  struct pt_regs *regs, int signr, sigset_t *set)
 {
 	struct sigframe32 __user *frame;
 	int err = 0;
@@ -608,8 +580,6 @@ static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto give_sigsegv;
 
-	err |= install_sigtramp(frame->sf_code, __NR_O32_sigreturn);
-
 	err |= setup_sigcontext32(regs, &frame->sf_sc);
 	err |= __copy_conv_sigset_to_user(&frame->sf_mask, set);
 
@@ -630,7 +600,7 @@ static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
 	regs->regs[ 5] = 0;
 	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
 	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) frame->sf_code;
+	regs->regs[31] = (unsigned long) sig_return;
 	regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler;
 
 	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
@@ -644,8 +614,9 @@ give_sigsegv:
 	return -EFAULT;
 }
 
-static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
-	int signr, sigset_t *set, siginfo_t *info)
+static int setup_rt_frame_32(void *sig_return, struct k_sigaction *ka,
+			     struct pt_regs *regs, int signr, sigset_t *set,
+			     siginfo_t *info)
 {
 	struct rt_sigframe32 __user *frame;
 	int err = 0;
@@ -655,8 +626,6 @@ static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto give_sigsegv;
 
-	err |= install_sigtramp(frame->rs_code, __NR_O32_rt_sigreturn);
-
 	/* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */
 	err |= copy_siginfo_to_user32(&frame->rs_info, info);
 
@@ -690,7 +659,7 @@ static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
 	regs->regs[ 5] = (unsigned long) &frame->rs_info;
 	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
 	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) frame->rs_code;
+	regs->regs[31] = (unsigned long) sig_return;
 	regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler;
 
 	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
@@ -709,7 +678,11 @@ give_sigsegv:
  */
 struct mips_abi mips_abi_32 = {
 	.setup_frame	= setup_frame_32,
+	.signal_return_offset =
+		offsetof(struct mips_vdso, o32_signal_trampoline),
 	.setup_rt_frame	= setup_rt_frame_32,
+	.rt_signal_return_offset =
+		offsetof(struct mips_vdso, o32_rt_signal_trampoline),
 	.restart	= __NR_O32_restart_syscall
 };
 
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index bb277e8..2c5df81 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -39,13 +39,13 @@
 #include <asm/fpu.h>
 #include <asm/cpu-features.h>
 #include <asm/war.h>
+#include <asm/vdso.h>
 
 #include "signal-common.h"
 
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
-#define __NR_N32_rt_sigreturn		6211
 #define __NR_N32_restart_syscall	6214
 
 extern int setup_sigcontext(struct pt_regs *, struct sigcontext __user *);
@@ -67,27 +67,13 @@ struct ucontextn32 {
 	compat_sigset_t     uc_sigmask;   /* mask last for extensibility */
 };
 
-#if ICACHE_REFILLS_WORKAROUND_WAR == 0
-
-struct rt_sigframe_n32 {
-	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_code[2];			/* signal trampoline */
-	struct compat_siginfo rs_info;
-	struct ucontextn32 rs_uc;
-};
-
-#else  /* ICACHE_REFILLS_WORKAROUND_WAR */
-
 struct rt_sigframe_n32 {
 	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_pad[2];
+	u32 rs_pad[2];			/* Was: signal trampoline */
 	struct compat_siginfo rs_info;
 	struct ucontextn32 rs_uc;
-	u32 rs_code[8] ____cacheline_aligned;		/* signal trampoline */
 };
 
-#endif	/* !ICACHE_REFILLS_WORKAROUND_WAR */
-
 extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 
 asmlinkage int sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
@@ -173,7 +159,7 @@ badframe:
 	force_sig(SIGSEGV, current);
 }
 
-static int setup_rt_frame_n32(struct k_sigaction * ka,
+static int setup_rt_frame_n32(void *sig_return, struct k_sigaction *ka,
 	struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info)
 {
 	struct rt_sigframe_n32 __user *frame;
@@ -184,8 +170,6 @@ static int setup_rt_frame_n32(struct k_sigaction * ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
 		goto give_sigsegv;
 
-	install_sigtramp(frame->rs_code, __NR_N32_rt_sigreturn);
-
 	/* Create siginfo.  */
 	err |= copy_siginfo_to_user32(&frame->rs_info, info);
 
@@ -219,7 +203,7 @@ static int setup_rt_frame_n32(struct k_sigaction * ka,
 	regs->regs[ 5] = (unsigned long) &frame->rs_info;
 	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
 	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) frame->rs_code;
+	regs->regs[31] = (unsigned long) sig_return;
 	regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler;
 
 	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
@@ -235,5 +219,7 @@ give_sigsegv:
 
 struct mips_abi mips_abi_n32 = {
 	.setup_rt_frame	= setup_rt_frame_n32,
+	.rt_signal_return_offset =
+		offsetof(struct mips_vdso, n32_rt_signal_trampoline),
 	.restart	= __NR_N32_restart_syscall
 };
-- 
1.6.6

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] MIPS vdso and signal delivery optimization (v2)
  2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
                   ` (2 preceding siblings ...)
  2010-02-19  0:13 ` [PATCH 3/3] MIPS: Move signal trampolines off of the stack David Daney
@ 2010-02-19  2:02 ` David Daney
  2010-02-19 22:08   ` David Daney
  2010-02-19 13:57 ` Ralf Baechle
  4 siblings, 1 reply; 14+ messages in thread
From: David Daney @ 2010-02-19  2:02 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips, ralf

Well this patch set does cause gdb to no longer be able to generate 
stack traces from signal handlers, but that just means gdb needs to be 
fixed.  We will work on that next.

libgcc can unwind through signal handlers both with and without the patch.

David Daney


On 02/18/2010 04:13 PM, David Daney wrote:
> This patch set creates a vdso and moves the signal
> trampolines to it from their previous home on the stack.
>
> In the original patch set:
> http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=49EE3B0F.3040506%40caviumnetworks.com
>
> I stated:
>
> Tested with a 64-bit kernel on a Cavium Octeon cn3860 where I have the
> following results from lmbench2:
>
> Before:
> n64 - Signal handler overhead: 14.517 microseconds
> n32 - Signal handler overhead: 14.497 microseconds
> o32 - Signal handler overhead: 16.637 microseconds
>
> After:
>
> n64 - Signal handler overhead: 7.935 microseconds
> n32 - Signal handler overhead: 7.334 microseconds
> o32 - Signal handler overhead: 8.628 microsecond
>
> All that is still true.
>
> Improvements from the first version:
>
> * Compiles and runs in 32-bit kernels (on qemu at least).
>
> * Updated for linux-queue based 2.6.33-rc8
>
> David Daney (3):
>    MIPS: Add SYSCALL to uasm.
>    MIPS: Preliminary vdso.
>    MIPS: Move signal trampolines off of the stack.
>
>   arch/mips/include/asm/abi.h         |    6 +-
>   arch/mips/include/asm/elf.h         |    4 +
>   arch/mips/include/asm/mmu.h         |    5 +-
>   arch/mips/include/asm/mmu_context.h |    2 +-
>   arch/mips/include/asm/processor.h   |   11 +++-
>   arch/mips/include/asm/uasm.h        |    1 +
>   arch/mips/include/asm/vdso.h        |   29 +++++++++
>   arch/mips/kernel/Makefile           |    2 +-
>   arch/mips/kernel/signal-common.h    |    5 --
>   arch/mips/kernel/signal.c           |   86 ++++++---------------------
>   arch/mips/kernel/signal32.c         |   55 ++++-------------
>   arch/mips/kernel/signal_n32.c       |   26 ++------
>   arch/mips/kernel/syscall.c          |    6 ++-
>   arch/mips/kernel/vdso.c             |  112 +++++++++++++++++++++++++++++++++++
>   arch/mips/mm/uasm.c                 |   19 +++++-
>   15 files changed, 226 insertions(+), 143 deletions(-)
>   create mode 100644 arch/mips/include/asm/vdso.h
>   create mode 100644 arch/mips/kernel/vdso.c
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] MIPS vdso and signal delivery optimization (v2)
  2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
                   ` (3 preceding siblings ...)
  2010-02-19  2:02 ` [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
@ 2010-02-19 13:57 ` Ralf Baechle
  2010-02-19 14:38   ` Ralf Baechle
  4 siblings, 1 reply; 14+ messages in thread
From: Ralf Baechle @ 2010-02-19 13:57 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips

On Thu, Feb 18, 2010 at 04:13:02PM -0800, David Daney wrote:

> Before:
> n64 - Signal handler overhead: 14.517 microseconds
> n32 - Signal handler overhead: 14.497 microseconds
> o32 - Signal handler overhead: 16.637 microseconds
> 
> After:
> 
> n64 - Signal handler overhead: 7.935 microseconds
> n32 - Signal handler overhead: 7.334 microseconds
> o32 - Signal handler overhead: 8.628 microsecond

On a 180MHz 2 CPU single-node IP27:

Before:
Signal handler installation: 3.524 microseconds
Signal handler overhead: 37.009 microseconds
Protection fault: 4.264 microseconds

After:
Signal handler installation: 3.536 microseconds
Signal handler overhead: 14.331 microseconds
Protection fault: 3.600 microseconds

Everything meassured with very ancient O32 lmbench 2-alpha11 binaries.
IP27 has processors in separate packages so the cache-to-cache overhead
and thus the speedup is much higher than you have observed.

In 2.4 we used to have drastically better signal latency numbers btw.  I
wonder where all that performance went down the drain.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] MIPS vdso and signal delivery optimization (v2)
  2010-02-19 13:57 ` Ralf Baechle
@ 2010-02-19 14:38   ` Ralf Baechle
  0 siblings, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2010-02-19 14:38 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips

On Fri, Feb 19, 2010 at 02:57:27PM +0100, Ralf Baechle wrote:

> > Before:
> > n64 - Signal handler overhead: 14.517 microseconds
> > n32 - Signal handler overhead: 14.497 microseconds
> > o32 - Signal handler overhead: 16.637 microseconds
> > 
> > After:
> > 
> > n64 - Signal handler overhead: 7.935 microseconds
> > n32 - Signal handler overhead: 7.334 microseconds
> > o32 - Signal handler overhead: 8.628 microsecond
> 
> On a 180MHz 2 CPU single-node IP27:
> 
> Before:
> Signal handler installation: 3.524 microseconds
> Signal handler overhead: 37.009 microseconds
> Protection fault: 4.264 microseconds
> 
> After:
> Signal handler installation: 3.536 microseconds
> Signal handler overhead: 14.331 microseconds
> Protection fault: 3.600 microseconds
> 
> Everything meassured with very ancient O32 lmbench 2-alpha11 binaries.
> IP27 has processors in separate packages so the cache-to-cache overhead
> and thus the speedup is much higher than you have observed.

I dug up an old 2.6.12-rc1 kernel binary and reran lmbench on the same
system:

Signal handler installation: 4.207 microseconds
Signal handler overhead: 29.618 microseconds
Protection fault: 2.105 microseconds

And on 2.4.25:

Signal handler installation: 3.674 microseconds
Signal handler overhead: 8.855 microseconds
Protection fault: 3.159 microseconds

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/3] MIPS vdso and signal delivery optimization (v2)
  2010-02-19  2:02 ` [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
@ 2010-02-19 22:08   ` David Daney
  0 siblings, 0 replies; 14+ messages in thread
From: David Daney @ 2010-02-19 22:08 UTC (permalink / raw)
  To: ralf; +Cc: linux-mips

[-- Attachment #1: Type: text/plain, Size: 2351 bytes --]

On 02/18/2010 06:02 PM, David Daney wrote:
> Well this patch set does cause gdb to no longer be able to generate
> stack traces from signal handlers, but that just means gdb needs to be
> fixed. We will work on that next.

Attached is the corresponding gdb patch.  I will push it into the 
upstream gdb.

David Daney


>
> libgcc can unwind through signal handlers both with and without the patch.
>
> David Daney
>
>
> On 02/18/2010 04:13 PM, David Daney wrote:
>> This patch set creates a vdso and moves the signal
>> trampolines to it from their previous home on the stack.
>>
>> In the original patch set:
>> http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=49EE3B0F.3040506%40caviumnetworks.com
>>
>>
>> I stated:
>>
>> Tested with a 64-bit kernel on a Cavium Octeon cn3860 where I have the
>> following results from lmbench2:
>>
>> Before:
>> n64 - Signal handler overhead: 14.517 microseconds
>> n32 - Signal handler overhead: 14.497 microseconds
>> o32 - Signal handler overhead: 16.637 microseconds
>>
>> After:
>>
>> n64 - Signal handler overhead: 7.935 microseconds
>> n32 - Signal handler overhead: 7.334 microseconds
>> o32 - Signal handler overhead: 8.628 microsecond
>>
>> All that is still true.
>>
>> Improvements from the first version:
>>
>> * Compiles and runs in 32-bit kernels (on qemu at least).
>>
>> * Updated for linux-queue based 2.6.33-rc8
>>
>> David Daney (3):
>> MIPS: Add SYSCALL to uasm.
>> MIPS: Preliminary vdso.
>> MIPS: Move signal trampolines off of the stack.
>>
>> arch/mips/include/asm/abi.h | 6 +-
>> arch/mips/include/asm/elf.h | 4 +
>> arch/mips/include/asm/mmu.h | 5 +-
>> arch/mips/include/asm/mmu_context.h | 2 +-
>> arch/mips/include/asm/processor.h | 11 +++-
>> arch/mips/include/asm/uasm.h | 1 +
>> arch/mips/include/asm/vdso.h | 29 +++++++++
>> arch/mips/kernel/Makefile | 2 +-
>> arch/mips/kernel/signal-common.h | 5 --
>> arch/mips/kernel/signal.c | 86 ++++++---------------------
>> arch/mips/kernel/signal32.c | 55 ++++-------------
>> arch/mips/kernel/signal_n32.c | 26 ++------
>> arch/mips/kernel/syscall.c | 6 ++-
>> arch/mips/kernel/vdso.c | 112 +++++++++++++++++++++++++++++++++++
>> arch/mips/mm/uasm.c | 19 +++++-
>> 15 files changed, 226 insertions(+), 143 deletions(-)
>> create mode 100644 arch/mips/include/asm/vdso.h
>> create mode 100644 arch/mips/kernel/vdso.c
>>
>>
>
>


[-- Attachment #2: gdb.patch --]
[-- Type: text/plain, Size: 1691 bytes --]

Index: gdb/mips-linux-tdep.c
===================================================================
RCS file: /cvs/src/src/gdb/mips-linux-tdep.c,v
retrieving revision 1.81
diff -u -p -r1.81 mips-linux-tdep.c
--- gdb/mips-linux-tdep.c	1 Jan 2010 07:31:37 -0000	1.81
+++ gdb/mips-linux-tdep.c	19 Feb 2010 21:58:32 -0000
@@ -797,7 +797,7 @@ static const struct tramp_frame mips_lin
 
    struct sigframe {
      u32 sf_ass[4];            [argument save space for o32]
-     u32 sf_code[2];           [signal trampoline]
+     u32 sf_code[2];           [signal trampoline or fill]
      struct sigcontext sf_sc;
      sigset_t sf_mask;
    };
@@ -827,7 +827,7 @@ static const struct tramp_frame mips_lin
 
    struct rt_sigframe {
      u32 rs_ass[4];            [argument save space for o32]
-     u32 rs_code[2]            [signal trampoline]
+     u32 rs_code[2]            [signal trampoline or fill]
      struct siginfo rs_info;
      struct ucontext rs_uc;
    };
@@ -871,7 +871,7 @@ mips_linux_o32_sigframe_init (const stru
 {
   struct gdbarch *gdbarch = get_frame_arch (this_frame);
   int ireg, reg_position;
-  CORE_ADDR sigcontext_base = func - SIGFRAME_CODE_OFFSET;
+  CORE_ADDR sigcontext_base = get_frame_sp (this_frame);
   const struct mips_regnum *regs = mips_regnum (gdbarch);
   CORE_ADDR regs_base;
 
@@ -1038,7 +1038,7 @@ mips_linux_n32n64_sigframe_init (const s
 {
   struct gdbarch *gdbarch = get_frame_arch (this_frame);
   int ireg, reg_position;
-  CORE_ADDR sigcontext_base = func - SIGFRAME_CODE_OFFSET;
+  CORE_ADDR sigcontext_base =  get_frame_sp (this_frame);
   const struct mips_regnum *regs = mips_regnum (gdbarch);
 
   if (self == &mips_linux_n32_rt_sigframe)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] MIPS: Preliminary vdso.
  2010-02-19  0:13 ` [PATCH 2/3] MIPS: Preliminary vdso David Daney
@ 2010-02-23 20:40   ` Manuel Lauss
  2010-02-23 21:27     ` David Daney
  2010-02-23 21:35     ` Ralf Baechle
  2010-03-16 19:55   ` Ralf Baechle
  1 sibling, 2 replies; 14+ messages in thread
From: Manuel Lauss @ 2010-02-23 20:40 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips, ralf

Hi David,

On Fri, Feb 19, 2010 at 1:13 AM, David Daney <ddaney@caviumnetworks.com> wrote:
> This is a preliminary patch to add a vdso to all user processes.
> Still missing are ELF headers and .eh_frame information.  But it is
> enough to allow us to move signal trampolines off of the stack.  Note
> that emulation of branch delay slots in the FPU emulator still
> requires the stack.
>
> We allocate a single page (the vdso) and write all possible signal
> trampolines into it.  The stack is moved down by one page and the vdso
> is mapped into this space.

Is there anything special required (i.e. special glibc, ..) to make use of these
fine patches?

Thanks,
     Manuel Lauss

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] MIPS: Preliminary vdso.
  2010-02-23 20:40   ` Manuel Lauss
@ 2010-02-23 21:27     ` David Daney
  2010-02-23 21:35     ` Ralf Baechle
  1 sibling, 0 replies; 14+ messages in thread
From: David Daney @ 2010-02-23 21:27 UTC (permalink / raw)
  To: Manuel Lauss; +Cc: linux-mips, ralf

On 02/23/2010 12:40 PM, Manuel Lauss wrote:
> Hi David,
>
> On Fri, Feb 19, 2010 at 1:13 AM, David Daney<ddaney@caviumnetworks.com>  wrote:
>> This is a preliminary patch to add a vdso to all user processes.
>> Still missing are ELF headers and .eh_frame information.  But it is
>> enough to allow us to move signal trampolines off of the stack.  Note
>> that emulation of branch delay slots in the FPU emulator still
>> requires the stack.
>>
>> We allocate a single page (the vdso) and write all possible signal
>> trampolines into it.  The stack is moved down by one page and the vdso
>> is mapped into this space.
>
> Is there anything special required (i.e. special glibc, ..) to make use of these
> fine patches?
>

No.  Quite the opposite really, they are designed for the most part to 
be transparent to userspace.

There are a couple of changes that shouldn't break anything serious:

1) The  process' VMA will have a [vdso] region at the highest possible 
address (above the stack).  Most code will not care about this.  However 
if you mprotect(PROT_WRITE) the region and then clobber it or munmap it, 
you will likely lose the ability to return from signal handlers.  It is 
copy-on-write, so this will not affect other processes.

2) The libgcc built by some older versions of GCC will not be able throw 
exceptions across a signal frame.  This is mostly a problem if you are 
using libgcj (the GCC java runtime).  Note however that the faulty 
versions of libgcc would also fail on kernels that need 
ICACHE_REFILLS_WORKAROUND_WAR (SGI O2).  Most code doesn't try to throw 
exceptions across signal frames, so it would be unaffected.  Also note 
that really old versions of libgcc don't support this trans-signal-frame 
throwing at all.

3) GDB will not show a valid backtrace from a signal handler.  I have 
submitted a gdb patch, but it has not been accepted yet.

David Daney

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] MIPS: Preliminary vdso.
  2010-02-23 20:40   ` Manuel Lauss
  2010-02-23 21:27     ` David Daney
@ 2010-02-23 21:35     ` Ralf Baechle
  1 sibling, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2010-02-23 21:35 UTC (permalink / raw)
  To: Manuel Lauss; +Cc: David Daney, linux-mips

On Tue, Feb 23, 2010 at 09:40:38PM +0100, Manuel Lauss wrote:

> > This is a preliminary patch to add a vdso to all user processes.
> > Still missing are ELF headers and .eh_frame information.  But it is
> > enough to allow us to move signal trampolines off of the stack.  Note
> > that emulation of branch delay slots in the FPU emulator still
> > requires the stack.
> >
> > We allocate a single page (the vdso) and write all possible signal
> > trampolines into it.  The stack is moved down by one page and the vdso
> > is mapped into this space.
> 
> Is there anything special required (i.e. special glibc, ..) to make use of these
> fine patches?

No - the way the signal handlers return is almost entirely hidden from
userland.  Only a few applications that have the need to unwind beyond
the signal stackframe may run into problems and have to be updated and
GDB is one of those that we identified.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] MIPS: Add SYSCALL to uasm.
  2010-02-19  0:13 ` [PATCH 1/3] MIPS: Add SYSCALL to uasm David Daney
@ 2010-03-16 19:55   ` Ralf Baechle
  0 siblings, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2010-03-16 19:55 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips

On Thu, Feb 18, 2010 at 04:13:03PM -0800, David Daney wrote:

Thanks, applied.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] MIPS: Preliminary vdso.
  2010-02-19  0:13 ` [PATCH 2/3] MIPS: Preliminary vdso David Daney
  2010-02-23 20:40   ` Manuel Lauss
@ 2010-03-16 19:55   ` Ralf Baechle
  1 sibling, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2010-03-16 19:55 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips

On Thu, Feb 18, 2010 at 04:13:04PM -0800, David Daney wrote:

> This is a preliminary patch to add a vdso to all user processes.
> Still missing are ELF headers and .eh_frame information.  But it is
> enough to allow us to move signal trampolines off of the stack.  Note
> that emulation of branch delay slots in the FPU emulator still
> requires the stack.
> 
> We allocate a single page (the vdso) and write all possible signal
> trampolines into it.  The stack is moved down by one page and the vdso
> is mapped into this space.

Thanks, applied.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] MIPS: Move signal trampolines off of the stack.
  2010-02-19  0:13 ` [PATCH 3/3] MIPS: Move signal trampolines off of the stack David Daney
@ 2010-03-16 19:56   ` Ralf Baechle
  0 siblings, 0 replies; 14+ messages in thread
From: Ralf Baechle @ 2010-03-16 19:56 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips

On Thu, Feb 18, 2010 at 04:13:05PM -0800, David Daney wrote:

> This is a follow on to the vdso patch.
> 
> Since all processes now have signal trampolines permanently mapped, we
> can use those instead of putting the trampoline on the stack and
> invalidating the corresponding icache across all CPUs.  We also get
> rid of a bunch of ICACHE_REFILLS_WORKAROUND_WAR code.

Thanks, applied.

  Ralf

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-03-16 19:56 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-19  0:13 [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
2010-02-19  0:13 ` [PATCH 1/3] MIPS: Add SYSCALL to uasm David Daney
2010-03-16 19:55   ` Ralf Baechle
2010-02-19  0:13 ` [PATCH 2/3] MIPS: Preliminary vdso David Daney
2010-02-23 20:40   ` Manuel Lauss
2010-02-23 21:27     ` David Daney
2010-02-23 21:35     ` Ralf Baechle
2010-03-16 19:55   ` Ralf Baechle
2010-02-19  0:13 ` [PATCH 3/3] MIPS: Move signal trampolines off of the stack David Daney
2010-03-16 19:56   ` Ralf Baechle
2010-02-19  2:02 ` [PATCH 0/3] MIPS vdso and signal delivery optimization (v2) David Daney
2010-02-19 22:08   ` David Daney
2010-02-19 13:57 ` Ralf Baechle
2010-02-19 14:38   ` Ralf Baechle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).