public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
@ 2006-05-16  6:03 Rusty Russell
  2006-05-16  6:47 ` Ingo Molnar
  0 siblings, 1 reply; 43+ messages in thread
From: Rusty Russell @ 2006-05-16  6:03 UTC (permalink / raw)
  To: lkml - Kernel Mailing List
  Cc: Linus Torvalds, virtualization, Gerd Hoffmann, Zachary Amsden

AFAICT we'll pay one extra TLB entry for this patch.  Zach had a patch
which left the vsyscall page at the top of memory (minus hole for
hypervisor) and patched the ELF header at boot.

Thoughts welcome,
Rusty.

Name: Move vsyscall page out of fixmap, above stack
Author: Gerd Hoffmann <kraxel@suse.de>

Hypervisors want to use memory at the top of the address space
(eg. 64MB for Xen, or 168MB for Xen w/ PAE).  Creating this hole means
moving the vsyscall page away from 0xffffe000.

If we create this hole statically with a config option, we give up,
say, 256MB of lowmem for the case where a hypervisor-capable kernel is
actually running on native hardware.

If we create this hole dynamically and leave the vsyscall page at the
top of kernel memory, we would have to patch up the vsyscall elf
header at boot time to reflect where we put it.

Instead, this patch moves the vsyscall page into the user address
region, just below PAGE_OFFSET: it's still at a fixed address, but
it's not where the hypervisor wants to be, so resizing the hole is
trivial.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/kernel/asm-offsets.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/asm-offsets.c
--- linux-2.6.17-rc4/arch/i386/kernel/asm-offsets.c	2005-07-15 04:38:36.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/asm-offsets.c	2006-05-16 14:24:00.000000000 +1000
@@ -13,6 +13,7 @@
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/thread_info.h>
+#include <asm/elf.h>
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -68,5 +69,5 @@ void foo(void)
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+	DEFINE(VSYSCALL_BASE, VSYSCALL_BASE);
 }
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/kernel/sysenter.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/sysenter.c
--- linux-2.6.17-rc4/arch/i386/kernel/sysenter.c	2006-03-23 12:42:01.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/sysenter.c	2006-05-16 14:27:05.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -45,23 +46,88 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
 
 int __init sysenter_setup(void)
 {
-	void *page = (void *)get_zeroed_page(GFP_ATOMIC);
-
-	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
+	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		memcpy(page,
+		memcpy(syscall_page,
 		       &vsyscall_int80_start,
 		       &vsyscall_int80_end - &vsyscall_int80_start);
 		return 0;
 	}
 
-	memcpy(page,
+	memcpy(syscall_page,
 	       &vsyscall_sysenter_start,
 	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
 
 	return 0;
 }
+
+static struct page*
+syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
+{
+	struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+	get_page(p);
+	return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct syscall_vm_ops = {
+	.close = syscall_vma_close,
+	.nopage = syscall_nopage,
+};
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma)
+		return -ENOMEM;
+
+	memset(vma, 0, sizeof(struct vm_area_struct));
+	/* Could randomize here */
+	vma->vm_start = VSYSCALL_BASE;
+	vma->vm_end = VSYSCALL_BASE + PAGE_SIZE;
+	/* MAYWRITE to allow gdb to COW and set breakpoints */
+	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+	vma->vm_ops = &syscall_vm_ops;
+	vma->vm_mm = mm;
+
+	down_write(&mm->mmap_sem);
+	if ((ret = insert_vm_struct(mm, vma))) {
+		up_write(&mm->mmap_sem);
+		kmem_cache_free(vm_area_cachep, vma);
+		return ret;
+	}
+	mm->total_vm++;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+	return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+	return 0;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+	return 0;
+}
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/mm/pgtable.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/mm/pgtable.c
--- linux-2.6.17-rc4/arch/i386/mm/pgtable.c	2006-05-16 10:50:48.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/mm/pgtable.c	2006-05-16 14:24:47.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -138,6 +139,10 @@ void set_pmd_pfn(unsigned long vaddr, un
 	__flush_tlb_one(vaddr);
 }
 
+static int nr_fixmaps = 0;
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -147,6 +152,13 @@ void __set_fixmap (enum fixed_addresses 
 		return;
 	}
 	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	nr_fixmaps++;
+}
+
+void set_fixaddr_top(unsigned long top)
+{
+	BUG_ON(nr_fixmaps > 0);
+	__FIXADDR_TOP = top - PAGE_SIZE;
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/a.out.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/a.out.h
--- linux-2.6.17-rc4/include/asm-i386/a.out.h	2004-02-04 14:43:43.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/a.out.h	2006-05-16 14:24:47.000000000 +1000
@@ -19,7 +19,7 @@ struct exec
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	TASK_SIZE
+#define STACK_TOP	(TASK_SIZE - 3*PAGE_SIZE)
 
 #endif
 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/elf.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/elf.h
--- linux-2.6.17-rc4/include/asm-i386/elf.h	2006-03-23 12:44:01.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/elf.h	2006-05-16 14:24:47.000000000 +1000
@@ -129,11 +129,16 @@ extern int dump_task_extended_fpu (struc
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
 
-#define VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
+#define VSYSCALL_BASE	(PAGE_OFFSET - 2*PAGE_SIZE)
 #define VSYSCALL_EHDR	((const struct elfhdr *) VSYSCALL_BASE)
 #define VSYSCALL_ENTRY	((unsigned long) &__kernel_vsyscall)
 extern void __kernel_vsyscall;
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+                                       int executable_stack);
+
 #define ARCH_DLINFO						\
 do {								\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/fixmap.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/fixmap.h
--- linux-2.6.17-rc4/include/asm-i386/fixmap.h	2006-03-23 12:43:10.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/fixmap.h	2006-05-16 14:24:47.000000000 +1000
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+extern unsigned long __FIXADDR_TOP;
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -52,7 +52,6 @@
  */
 enum fixed_addresses {
 	FIX_HOLE,
-	FIX_VSYSCALL,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 #endif
@@ -95,6 +94,8 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
 					unsigned long phys, pgprot_t flags);
 
+extern void set_fixaddr_top(unsigned long top);
+
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
 /*
@@ -116,14 +117,6 @@ extern void __set_fixmap (enum fixed_add
 #define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
 
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
-#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
-
-
 extern void __this_fixmap_does_not_exist(void);
 
 /*
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/page.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/page.h
--- linux-2.6.17-rc4/include/asm-i386/page.h	2006-05-16 10:51:38.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/page.h	2006-05-16 14:24:47.000000000 +1000
@@ -121,7 +121,7 @@ extern int page_is_ram(unsigned long pag
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM			(__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
@@ -137,6 +137,8 @@ extern int page_is_ram(unsigned long pag
 	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
 		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>

-- 
 ccontrol: http://ccontrol.ozlabs.org


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-16  6:03 [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch Rusty Russell
@ 2006-05-16  6:47 ` Ingo Molnar
  2006-05-16  8:16   ` Zachary Amsden
  2006-05-17  7:49   ` Rusty Russell
  0 siblings, 2 replies; 43+ messages in thread
From: Ingo Molnar @ 2006-05-16  6:47 UTC (permalink / raw)
  To: Rusty Russell
  Cc: lkml - Kernel Mailing List, Linus Torvalds, virtualization,
	Gerd Hoffmann, Zachary Amsden


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> AFAICT we'll pay one extra TLB entry for this patch.  Zach had a patch 
> which left the vsyscall page at the top of memory (minus hole for 
> hypervisor) and patched the ELF header at boot.

i'd suggest the solution from exec-shield (which has been there for a 
long time), which also randomizes the vsyscall vma. Exploits are already 
starting to use the vsyscall page (with predictable addresses) to 
circumvent randomization, it provides 'interesting' instructions to act 
as a syscall-functionality building block. Moving that address to 
another predictable place solves the virtualization problem, but doesnt 
solve the address-space randomization problem.

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-16  6:47 ` Ingo Molnar
@ 2006-05-16  8:16   ` Zachary Amsden
  2006-05-16  8:40     ` Chris Wright
  2006-05-17  7:49   ` Rusty Russell
  1 sibling, 1 reply; 43+ messages in thread
From: Zachary Amsden @ 2006-05-16  8:16 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rusty Russell, lkml - Kernel Mailing List, Linus Torvalds,
	virtualization, Gerd Hoffmann

Ingo Molnar wrote:
> * Rusty Russell <rusty@rustcorp.com.au> wrote:
>
>   
>> AFAICT we'll pay one extra TLB entry for this patch.  Zach had a patch 
>> which left the vsyscall page at the top of memory (minus hole for 
>> hypervisor) and patched the ELF header at boot.
>>     
>
> i'd suggest the solution from exec-shield (which has been there for a 
> long time), which also randomizes the vsyscall vma. Exploits are already 
> starting to use the vsyscall page (with predictable addresses) to 
> circumvent randomization, it provides 'interesting' instructions to act 
> as a syscall-functionality building block. Moving that address to 
> another predictable place solves the virtualization problem, but doesnt 
> solve the address-space randomization problem.
>   

Let's dive into it.  How do you get the randomization without 
sacrificing syscall performance?  Do you randomize on boot, dynamically, 
or on a per-process level?  Because I can see some issues with 
per-process randomization that will certainly cost some amount of cycles 
on the system call path.  Marginal perhaps, but that is exactly where 
you don't want to shed cycles unnecessarily, and the complexity of the 
whole thing will go up quite a bit I think.

Zach

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-16  8:16   ` Zachary Amsden
@ 2006-05-16  8:40     ` Chris Wright
  2006-05-16  8:59       ` Zachary Amsden
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wright @ 2006-05-16  8:40 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Ingo Molnar, Rusty Russell, lkml - Kernel Mailing List,
	Linus Torvalds, virtualization, Gerd Hoffmann

* Zachary Amsden (zach@vmware.com) wrote:
> Let's dive into it.  How do you get the randomization without 
> sacrificing syscall performance?  Do you randomize on boot, dynamically, 
> or on a per-process level?

The latter, on exec.

> Because I can see some issues with 
> per-process randomization that will certainly cost some amount of cycles 
> on the system call path.  Marginal perhaps, but that is exactly where 
> you don't want to shed cycles unnecessarily, and the complexity of the 
> whole thing will go up quite a bit I think.

The crux is here:

+       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
...

-       pushl $SYSENTER_RETURN
-
+       /*
+        * Push current_thread_info()->sysenter_return to the stack.
+        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+        * pushed above; +8 corresponds to copy_thread's esp0 setting.
+        */
+       pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)

...

and in binfmt_elf during exec thread_info->sysenter_return is setup
based on the randomized mapping it does for vdso

+               ti->sysenter_return = &SYSENTER_RETURN_OFFSET + addr;


I think it's not so bad, but I can't say I've benchmarked the cost.

thanks,
-chris

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-16  8:40     ` Chris Wright
@ 2006-05-16  8:59       ` Zachary Amsden
  0 siblings, 0 replies; 43+ messages in thread
From: Zachary Amsden @ 2006-05-16  8:59 UTC (permalink / raw)
  To: Chris Wright
  Cc: Ingo Molnar, Rusty Russell, lkml - Kernel Mailing List,
	Linus Torvalds, virtualization, Gerd Hoffmann

Chris Wright wrote:
> * Zachary Amsden (zach@vmware.com) wrote:
>   
>> Let's dive into it.  How do you get the randomization without 
>> sacrificing syscall performance?  Do you randomize on boot, dynamically, 
>> or on a per-process level?
>>     
>
> The latter, on exec.
>
>   
>> Because I can see some issues with 
>> per-process randomization that will certainly cost some amount of cycles 
>> on the system call path.  Marginal perhaps, but that is exactly where 
>> you don't want to shed cycles unnecessarily, and the complexity of the 
>> whole thing will go up quite a bit I think.
>>     
>
> The crux is here:
>
> +       OFFSET(TI_sysenter_return, thread_info, sysenter_return);
> ...
>
> -       pushl $SYSENTER_RETURN
> -
> +       /*
> +        * Push current_thread_info()->sysenter_return to the stack.
> +        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
> +        * pushed above; +8 corresponds to copy_thread's esp0 setting.
> +        */
> +       pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
>
> ...
>
> and in binfmt_elf during exec thread_info->sysenter_return is setup
> based on the randomized mapping it does for vdso
>
> +               ti->sysenter_return = &SYSENTER_RETURN_OFFSET + addr;
>
>
> I think it's not so bad, but I can't say I've benchmarked the cost.
>   

Now that I see it, it doesn't look bad at all.  I had imagined a host of 
holy horrors unfolding from it, but clearly that is not the case.  I 
think there is still the sysexit path that needs some change, but in 
total, there should be almost zero cycle impact.  I envisioned trying to 
get the thread info for the return address would be awkward, but you've 
already switched the stack at this point, so it is really almost free.

Zach


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-16  6:47 ` Ingo Molnar
  2006-05-16  8:16   ` Zachary Amsden
@ 2006-05-17  7:49   ` Rusty Russell
  2006-05-18  7:54     ` Ingo Molnar
  2006-05-20  0:43     ` Andrew Morton
  1 sibling, 2 replies; 43+ messages in thread
From: Rusty Russell @ 2006-05-17  7:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: lkml - Kernel Mailing List, Linus Torvalds, virtualization,
	Gerd Hoffmann, Zachary Amsden

On Tue, 2006-05-16 at 08:47 +0200, Ingo Molnar wrote:
> * Rusty Russell <rusty@rustcorp.com.au> wrote:
> 
> > AFAICT we'll pay one extra TLB entry for this patch.  Zach had a patch 
> > which left the vsyscall page at the top of memory (minus hole for 
> > hypervisor) and patched the ELF header at boot.
> 
> i'd suggest the solution from exec-shield (which has been there for a 
> long time), which also randomizes the vsyscall vma.

Hi Ingo!

	Thanks, I looked at the exec-shield patch.  It has some rough edges (at
least the 2.6.16 version I found).

	Gerd's is basically a minimal subset of the exec-shield: we can go
further towards exec-shield by using get_unmapped_area for the vsyscall
page rather than nailing it above the stack, but it takes us from a
280-line patch to a 480-line patch.

See below...
Rusty.

Name: Move vsyscall page out of fixmap into normal vma as per mmap
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Status: Boots under qemu

Rather than move the stack down, use get_unmapped_area for the
vsyscall page, as the exec-shield patch does.  This means we need a
pointer in the thread_info, too.

This steals half of the code from Gerd Hoffmann's patch which moves
out of fixmap, and most of the rest from Ingo Molnar's exec-shield
patch for 2.6.16.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/asm-offsets.c .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/asm-offsets.c
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/asm-offsets.c	2005-07-15 04:38:36.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/asm-offsets.c	2006-05-17 17:10:49.000000000 +1000
@@ -53,6 +53,7 @@ void foo(void)
 	OFFSET(TI_preempt_count, thread_info, preempt_count);
 	OFFSET(TI_addr_limit, thread_info, addr_limit);
 	OFFSET(TI_restart_block, thread_info, restart_block);
+	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
 	BLANK();
 
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
@@ -68,5 +69,4 @@ void foo(void)
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
 }
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/entry.S .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/entry.S
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/entry.S	2006-05-16 10:50:48.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/entry.S	2006-05-17 17:10:49.000000000 +1000
@@ -184,8 +184,12 @@ sysenter_past_esp:
 	pushl %ebp
 	pushfl
 	pushl $(__USER_CS)
-	pushl $SYSENTER_RETURN
-
+	/*
+	 * Push current_thread_info()->sysenter_return to the stack.
+	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
+	 */
+	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/signal.c .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/signal.c
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/signal.c	2006-05-16 10:50:48.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/signal.c	2006-05-17 17:10:49.000000000 +1000
@@ -351,7 +351,7 @@ static int setup_frame(int sig, struct k
 			goto give_sigsegv;
 	}
 
-	restorer = &__kernel_sigreturn;
+	restorer = current->mm->context.vdso + (long)&__kernel_sigreturn;
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
@@ -447,7 +447,7 @@ static int setup_rt_frame(int sig, struc
 		goto give_sigsegv;
 
 	/* Set up to return from userspace.  */
-	restorer = &__kernel_rt_sigreturn;
+	restorer = current->mm->context.vdso + (long)&__kernel_rt_sigreturn;
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 	err |= __put_user(restorer, &frame->pretcode);
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/sysenter.c .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/sysenter.c
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/sysenter.c	2006-03-23 12:42:01.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/sysenter.c	2006-05-17 17:10:49.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -45,23 +46,111 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
 
 int __init sysenter_setup(void)
 {
-	void *page = (void *)get_zeroed_page(GFP_ATOMIC);
-
-	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
+	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		memcpy(page,
+		memcpy(syscall_page,
 		       &vsyscall_int80_start,
 		       &vsyscall_int80_end - &vsyscall_int80_start);
 		return 0;
 	}
 
-	memcpy(page,
+	memcpy(syscall_page,
 	       &vsyscall_sysenter_start,
 	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
 
 	return 0;
 }
+
+static struct page*
+syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
+{
+	struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+	get_page(p);
+	return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct syscall_vm_ops = {
+	.close = syscall_vma_close,
+	.nopage = syscall_nopage,
+};
+
+/* Defined in vsyscall-sysenter.S */
+extern char SYSENTER_RETURN_OFFSET[];
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret;
+
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma) {
+		ret = -ENOMEM;
+		goto up_fail;
+	}
+
+	memset(vma, 0, sizeof(struct vm_area_struct));
+	vma->vm_start = addr;
+	vma->vm_end = addr + PAGE_SIZE;
+	/* MAYWRITE to allow gdb to COW and set breakpoints */
+	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+	vma->vm_ops = &syscall_vm_ops;
+	vma->vm_mm = mm;
+
+	if ((ret = insert_vm_struct(mm, vma)))
+		goto free_vma;
+	current->mm->context.vdso = (void *)addr;
+	current_thread_info()->sysenter_return = SYSENTER_RETURN_OFFSET + addr;
+	mm->total_vm++;
+	up_write(&mm->mmap_sem);
+	return 0;
+
+free_vma:
+	kmem_cache_free(vm_area_cachep, vma);
+up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+		return "[vdso]";
+	return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+	return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+	return 0;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+	return 0;
+}
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/vsyscall-sysenter.S .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/vsyscall-sysenter.S
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/vsyscall-sysenter.S	2006-05-16 10:50:48.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/vsyscall-sysenter.S	2006-05-17 17:10:49.000000000 +1000
@@ -42,11 +42,11 @@ __kernel_vsyscall:
 	/* 7: align return point with nop's to make disassembly easier */
 	.space 7,0x90
 
-	/* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
+	/* 14: System call restart point is here! (SYSENTER_RETURN_OFFSET-2) */
 	jmp .Lenter_kernel
 	/* 16: System call normal return point is here! */
-	.globl SYSENTER_RETURN	/* Symbol used by entry.S.  */
-SYSENTER_RETURN:
+	.globl SYSENTER_RETURN_OFFSET	/* Symbol used by sysenter.c  */
+SYSENTER_RETURN_OFFSET:
 	pop %ebp
 .Lpop_ebp:
 	pop %edx
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/vsyscall.lds.S .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/vsyscall.lds.S
--- .23560-linux-2.6.17-rc4-git3/arch/i386/kernel/vsyscall.lds.S	2006-03-23 12:42:42.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/kernel/vsyscall.lds.S	2006-05-17 17:10:49.000000000 +1000
@@ -7,7 +7,7 @@
 
 SECTIONS
 {
-  . = VSYSCALL_BASE + SIZEOF_HEADERS;
+  . = SIZEOF_HEADERS;
 
   .hash           : { *(.hash) }		:text
   .dynsym         : { *(.dynsym) }
@@ -20,7 +20,7 @@ SECTIONS
      For the layouts to match, we need to skip more than enough
      space for the dynamic symbol table et al.  If this amount
      is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = VSYSCALL_BASE + 0x400;
+  . = 0x400;
 
   .text           : { *(.text) }		:text =0x90909090
   .note		  : { *(.note.*) }		:text :note
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/arch/i386/mm/pgtable.c .23560-linux-2.6.17-rc4-git3.updated/arch/i386/mm/pgtable.c
--- .23560-linux-2.6.17-rc4-git3/arch/i386/mm/pgtable.c	2006-05-16 10:50:48.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/arch/i386/mm/pgtable.c	2006-05-17 17:10:49.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -138,6 +139,10 @@ void set_pmd_pfn(unsigned long vaddr, un
 	__flush_tlb_one(vaddr);
 }
 
+static int nr_fixmaps = 0;
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -147,6 +152,13 @@ void __set_fixmap (enum fixed_addresses 
 		return;
 	}
 	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	nr_fixmaps++;
+}
+
+void set_fixaddr_top(unsigned long top)
+{
+	BUG_ON(nr_fixmaps > 0);
+	__FIXADDR_TOP = top - PAGE_SIZE;
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/fs/proc/task_mmu.c .23560-linux-2.6.17-rc4-git3.updated/fs/proc/task_mmu.c
--- .23560-linux-2.6.17-rc4-git3/fs/proc/task_mmu.c	2006-03-23 12:44:56.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/fs/proc/task_mmu.c	2006-05-17 17:10:49.000000000 +1000
@@ -153,22 +153,23 @@ static int show_map_internal(struct seq_
 		pad_len_spaces(m, len);
 		seq_path(m, file->f_vfsmnt, file->f_dentry, "\n");
 	} else {
-		if (mm) {
-			if (vma->vm_start <= mm->start_brk &&
+		const char *name = arch_vma_name(vma);
+		if (!name) {
+			if (mm) {
+				if (vma->vm_start <= mm->start_brk &&
 						vma->vm_end >= mm->brk) {
-				pad_len_spaces(m, len);
-				seq_puts(m, "[heap]");
-			} else {
-				if (vma->vm_start <= mm->start_stack &&
-					vma->vm_end >= mm->start_stack) {
-
-					pad_len_spaces(m, len);
-					seq_puts(m, "[stack]");
+					name = "[heap]";
+				} else if (vma->vm_start <= mm->start_stack &&
+					   vma->vm_end >= mm->start_stack) {
+					name = "[stack]";
 				}
+			} else {
+				name = "[vdso]";
 			}
-		} else {
+		}
+		if (name) {
 			pad_len_spaces(m, len);
-			seq_puts(m, "[vdso]");
+			seq_puts(m, name);
 		}
 	}
 	seq_putc(m, '\n');
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/asm-i386/elf.h .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/elf.h
--- .23560-linux-2.6.17-rc4-git3/include/asm-i386/elf.h	2006-03-23 12:44:01.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/elf.h	2006-05-17 17:10:49.000000000 +1000
@@ -10,6 +10,7 @@
 #include <asm/processor.h>
 #include <asm/system.h>		/* for savesegment */
 #include <asm/auxvec.h>
+#include <asm/desc.h>
 
 #include <linux/utsname.h>
 
@@ -129,11 +130,20 @@ extern int dump_task_extended_fpu (struc
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
 
-#define VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
+#define VSYSCALL_BASE	((unsigned long)current->mm->context.vdso)
 #define VSYSCALL_EHDR	((const struct elfhdr *) VSYSCALL_BASE)
-#define VSYSCALL_ENTRY	((unsigned long) &__kernel_vsyscall)
+#define VSYSCALL_OFFSET	((unsigned long) &__kernel_vsyscall)
+#define VSYSCALL_ENTRY	(VSYSCALL_BASE + VSYSCALL_OFFSET)
+/* kernel-internal fixmap address: */
+#define __VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
+#define __VSYSCALL_EHDR	((const struct elfhdr *) __VSYSCALL_BASE)
 extern void __kernel_vsyscall;
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+                                       int executable_stack);
+
 #define ARCH_DLINFO						\
 do {								\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
@@ -148,15 +158,15 @@ do {								\
  * Dumping its extra ELF program headers includes all the other information
  * a debugger needs to easily find how the vsyscall DSO was being used.
  */
-#define ELF_CORE_EXTRA_PHDRS		(VSYSCALL_EHDR->e_phnum)
+#define ELF_CORE_EXTRA_PHDRS		(__VSYSCALL_EHDR->e_phnum)
 #define ELF_CORE_WRITE_EXTRA_PHDRS					      \
 do {									      \
 	const struct elf_phdr *const vsyscall_phdrs =			      \
-		(const struct elf_phdr *) (VSYSCALL_BASE		      \
-					   + VSYSCALL_EHDR->e_phoff);	      \
+		(const struct elf_phdr *) (__VSYSCALL_BASE		      \
+					   + __VSYSCALL_EHDR->e_phoff);	      \
 	int i;								      \
 	Elf32_Off ofs = 0;						      \
-	for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {			      \
+	for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) {		      \
 		struct elf_phdr phdr = vsyscall_phdrs[i];		      \
 		if (phdr.p_type == PT_LOAD) {				      \
 			BUG_ON(ofs != 0);				      \
@@ -174,10 +184,10 @@ do {									      \
 #define ELF_CORE_WRITE_EXTRA_DATA					      \
 do {									      \
 	const struct elf_phdr *const vsyscall_phdrs =			      \
-		(const struct elf_phdr *) (VSYSCALL_BASE		      \
-					   + VSYSCALL_EHDR->e_phoff);	      \
+		(const struct elf_phdr *) (__VSYSCALL_BASE		      \
+					   + __VSYSCALL_EHDR->e_phoff);	      \
 	int i;								      \
-	for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {			      \
+	for (i = 0; i < __VSYSCALL_EHDR->e_phnum; ++i) {		      \
 		if (vsyscall_phdrs[i].p_type == PT_LOAD)		      \
 			DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr,	      \
 				   PAGE_ALIGN(vsyscall_phdrs[i].p_memsz));    \
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/asm-i386/fixmap.h .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/fixmap.h
--- .23560-linux-2.6.17-rc4-git3/include/asm-i386/fixmap.h	2006-03-23 12:43:10.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/fixmap.h	2006-05-17 17:10:49.000000000 +1000
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+extern unsigned long __FIXADDR_TOP;
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -95,6 +95,8 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
 					unsigned long phys, pgprot_t flags);
 
+extern void set_fixaddr_top(unsigned long top);
+
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
 /*
@@ -116,14 +118,6 @@ extern void __set_fixmap (enum fixed_add
 #define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
 
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
-#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
-
-
 extern void __this_fixmap_does_not_exist(void);
 
 /*
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/asm-i386/mmu.h .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/mmu.h
--- .23560-linux-2.6.17-rc4-git3/include/asm-i386/mmu.h	2004-02-04 14:43:57.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/mmu.h	2006-05-17 17:10:49.000000000 +1000
@@ -12,6 +12,7 @@ typedef struct { 
 	int size;
 	struct semaphore sem;
 	void *ldt;
+	void *vdso;
 } mm_context_t;
 
 #endif
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/asm-i386/page.h .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/page.h
--- .23560-linux-2.6.17-rc4-git3/include/asm-i386/page.h	2006-05-16 10:51:38.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/page.h	2006-05-17 17:10:49.000000000 +1000
@@ -107,6 +107,9 @@ extern int sysctl_legacy_va_layout;
 
 extern int page_is_ram(unsigned long pagenr);
 
+#define __HAVE_ARCH_VMA_NAME 1
+struct vm_area_struct;
+const char *arch_vma_name(struct vm_area_struct *vma);
 #endif /* __ASSEMBLY__ */
 
 #ifdef __ASSEMBLY__
@@ -121,7 +124,7 @@ extern int page_is_ram(unsigned long pag
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM			(__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
@@ -137,6 +140,7 @@ extern int page_is_ram(unsigned long pag
 	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
 		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
+#define __HAVE_ARCH_GATE_AREA 1
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/asm-i386/thread_info.h .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/thread_info.h
--- .23560-linux-2.6.17-rc4-git3/include/asm-i386/thread_info.h	2006-03-23 12:44:59.000000000 +1100
+++ .23560-linux-2.6.17-rc4-git3.updated/include/asm-i386/thread_info.h	2006-05-17 17:10:49.000000000 +1000
@@ -38,6 +38,7 @@ struct thread_info {
 					 	   0-0xBFFFFFFF for user-thead
 						   0-0xFFFFFFFF for kernel-thread
 						*/
+	void			*sysenter_return;
 	struct restart_block    restart_block;
 
 	unsigned long           previous_esp;   /* ESP of the previous stack in case
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23560-linux-2.6.17-rc4-git3/include/linux/mm.h .23560-linux-2.6.17-rc4-git3.updated/include/linux/mm.h
--- .23560-linux-2.6.17-rc4-git3/include/linux/mm.h	2006-05-16 10:51:43.000000000 +1000
+++ .23560-linux-2.6.17-rc4-git3.updated/include/linux/mm.h	2006-05-17 17:10:49.000000000 +1000
@@ -1042,6 +1042,13 @@ int in_gate_area_no_task(unsigned long a
 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
+#ifndef __HAVE_ARCH_VMA_NAME
+static inline const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif	/* __HAVE_ARCH_VMA_NAME */
+
 /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
 #define OOM_DISABLE -17
 

-- 
 ccontrol: http://ccontrol.ozlabs.org


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-17  7:49   ` Rusty Russell
@ 2006-05-18  7:54     ` Ingo Molnar
  2006-05-18  8:29       ` Gerd Hoffmann
  2006-05-20  0:43     ` Andrew Morton
  1 sibling, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-18  7:54 UTC (permalink / raw)
  To: Rusty Russell
  Cc: lkml - Kernel Mailing List, Linus Torvalds, virtualization,
	Gerd Hoffmann, Zachary Amsden


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> 	Thanks, I looked at the exec-shield patch.  It has some rough 
> edges (at least the 2.6.16 version I found).

the most recent one is always in the Fedora rawhide kernel RPM/SRPM. 
(that means it closely tracks upstream.)

> 	Gerd's is basically a minimal subset of the exec-shield: we 
> can go further towards exec-shield by using get_unmapped_area for the 
> vsyscall page rather than nailing it above the stack, but it takes us 
> from a 280-line patch to a 480-line patch.

certainly looks good to me! What are the changes you did to the 
exec-shield implementation of vdso randomization? The patch seems 
largely identical to the one in exec-shield.

(and it would be nice to do this on x86_64 too - exploits already exist 
using the fixmapped VDSO there as a trampoline.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-18  7:54     ` Ingo Molnar
@ 2006-05-18  8:29       ` Gerd Hoffmann
  0 siblings, 0 replies; 43+ messages in thread
From: Gerd Hoffmann @ 2006-05-18  8:29 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rusty Russell, lkml - Kernel Mailing List, Linus Torvalds,
	virtualization, Zachary Amsden

> 
> (and it would be nice to do this on x86_64 too - exploits already exist 
> using the fixmapped VDSO there as a trampoline.)

At least for the ia32 emulation that should be easy as the idea to
implement the vsyscall page as vma was shamlessly stolen from andy's
arch/x86_64/ia32/syscall32.c ;)

cheers,

  Gerd

-- 
Gerd Hoffmann <kraxel@suse.de>
Erst mal heiraten, ein, zwei Kinder, und wenn alles läuft
geh' ich nach drei Jahren mit der Familie an die Börse.
http://www.suse.de/~kraxel/julika-dora.jpeg

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-17  7:49   ` Rusty Russell
  2006-05-18  7:54     ` Ingo Molnar
@ 2006-05-20  0:43     ` Andrew Morton
  2006-05-20  1:03       ` Ingo Molnar
                         ` (2 more replies)
  1 sibling, 3 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-20  0:43 UTC (permalink / raw)
  To: Rusty Russell; +Cc: mingo, linux-kernel, torvalds, virtualization, kraxel, zach

Rusty Russell <rusty@rustcorp.com.au> wrote:
>
> Name: Move vsyscall page out of fixmap into normal vma as per mmap

This causes mysterious hangs when starting init.

Distro is RH FC1, running SysVinit-2.85-5.

dmesg, sysrq-T and .config are at
http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
out.

This is the second time recently when a patch has caused this machine to
oddly hang in init.  It's possible that there's a bug of some form in that
version of init that we'll need to know about and take care of in some
fashion.


(I verified the hang with just -linus+this, so it's not related to any
other -mm things).


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  0:43     ` Andrew Morton
@ 2006-05-20  1:03       ` Ingo Molnar
  2006-05-20  1:11         ` Andrew Morton
  2006-05-20  1:24       ` Arjan van de Ven
  2006-05-22 16:29       ` Jakub Jelinek
  2 siblings, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-20  1:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Rusty Russell, linux-kernel, torvalds, virtualization, kraxel,
	zach


* Andrew Morton <akpm@osdl.org> wrote:

> Rusty Russell <rusty@rustcorp.com.au> wrote:
> >
> > Name: Move vsyscall page out of fixmap into normal vma as per mmap
> 
> This causes mysterious hangs when starting init.
> 
> Distro is RH FC1, running SysVinit-2.85-5.
> 
> dmesg, sysrq-T and .config are at
> http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
> out.
> 
> This is the second time recently when a patch has caused this machine 
> to oddly hang in init.  It's possible that there's a bug of some form 
> in that version of init that we'll need to know about and take care of 
> in some fashion.

FC1 is like really ancient. I think there was a glibc bug that caused 
vsyscall related init hangs like that. To nevertheless let people run 
their old stuff there's a vdso=0 boot option in exec-shield.

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  1:03       ` Ingo Molnar
@ 2006-05-20  1:11         ` Andrew Morton
  2006-05-20  1:15           ` Linus Torvalds
                             ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-20  1:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: rusty, linux-kernel, torvalds, virtualization, kraxel, zach

Ingo Molnar <mingo@elte.hu> wrote:
>
> 
> * Andrew Morton <akpm@osdl.org> wrote:
> 
> > Rusty Russell <rusty@rustcorp.com.au> wrote:
> > >
> > > Name: Move vsyscall page out of fixmap into normal vma as per mmap
> > 
> > This causes mysterious hangs when starting init.
> > 
> > Distro is RH FC1, running SysVinit-2.85-5.
> > 
> > dmesg, sysrq-T and .config are at
> > http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
> > out.
> > 
> > This is the second time recently when a patch has caused this machine 
> > to oddly hang in init.  It's possible that there's a bug of some form 
> > in that version of init that we'll need to know about and take care of 
> > in some fashion.
> 
> FC1 is like really ancient. I think there was a glibc bug that caused 
> vsyscall related init hangs like that. To nevertheless let people run 
> their old stuff there's a vdso=0 boot option in exec-shield.
> 

Well that patch took a machine from working to non-working.  Pretty serious
stuff.  We should get to the bottom of the problem so we can assess the
risk and impact, no?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  1:11         ` Andrew Morton
@ 2006-05-20  1:15           ` Linus Torvalds
  2006-05-20  8:53             ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
  2006-05-20  1:16           ` [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch Zachary Amsden
  2006-05-20  1:49           ` Andi Kleen
  2 siblings, 1 reply; 43+ messages in thread
From: Linus Torvalds @ 2006-05-20  1:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, rusty, linux-kernel, virtualization, kraxel, zach



On Fri, 19 May 2006, Andrew Morton wrote:
> > 
> > FC1 is like really ancient. I think there was a glibc bug that caused 
> > vsyscall related init hangs like that. To nevertheless let people run 
> > their old stuff there's a vdso=0 boot option in exec-shield.
>
> 
> Well that patch took a machine from working to non-working.  Pretty serious
> stuff.  We should get to the bottom of the problem so we can assess the
> risk and impact, no?

Yes. And it would be good to have a way to turn it off - either globally 
of by some per-process setup (eg off by default, but turn on when doing 
some magic).

The per-process one would be the harder one, because it would require the 
fixmap entry, but not globally. So I suspect the only practical thing 
would be to have it be a kernel boot-time option.

		Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  1:11         ` Andrew Morton
  2006-05-20  1:15           ` Linus Torvalds
@ 2006-05-20  1:16           ` Zachary Amsden
  2006-05-20  1:49           ` Andi Kleen
  2 siblings, 0 replies; 43+ messages in thread
From: Zachary Amsden @ 2006-05-20  1:16 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, rusty, linux-kernel, torvalds, virtualization,
	kraxel

[-- Attachment #1: Type: text/plain, Size: 1301 bytes --]

Andrew Morton wrote:
> Ingo Molnar <mingo@elte.hu> wrote:
>   
>> * Andrew Morton <akpm@osdl.org> wrote:
>>
>>     
>>> Rusty Russell <rusty@rustcorp.com.au> wrote:
>>>       
>>>> Name: Move vsyscall page out of fixmap into normal vma as per mmap
>>>>         
>>> This causes mysterious hangs when starting init.
>>>
>>> Distro is RH FC1, running SysVinit-2.85-5.
>>>
>>> dmesg, sysrq-T and .config are at
>>> http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
>>> out.
>>>
>>> This is the second time recently when a patch has caused this machine 
>>> to oddly hang in init.  It's possible that there's a bug of some form 
>>> in that version of init that we'll need to know about and take care of 
>>> in some fashion.
>>>       
>> FC1 is like really ancient. I think there was a glibc bug that caused 
>> vsyscall related init hangs like that. To nevertheless let people run 
>> their old stuff there's a vdso=0 boot option in exec-shield.
>>
>>     
>
> Well that patch took a machine from working to non-working.  Pretty serious
> stuff.  We should get to the bottom of the problem so we can assess the
> risk and impact, no?

An easy test for culpability of kernel vs. init would be to back out all 
patches and recompile the kernel with vsyscall moved down by 4 megs.



[-- Attachment #2: bogo-fixmap --]
[-- Type: text/plain, Size: 512 bytes --]


Index: linux-2.6.17-rc/include/asm-i386/fixmap.h
===================================================================
--- linux-2.6.17-rc.orig/include/asm-i386/fixmap.h	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc/include/asm-i386/fixmap.h	2006-05-19 18:16:00.000000000 -0700
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+#define __FIXADDR_TOP	0xffbff000
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  0:43     ` Andrew Morton
  2006-05-20  1:03       ` Ingo Molnar
@ 2006-05-20  1:24       ` Arjan van de Ven
  2006-05-22 16:29       ` Jakub Jelinek
  2 siblings, 0 replies; 43+ messages in thread
From: Arjan van de Ven @ 2006-05-20  1:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Rusty Russell, mingo, linux-kernel, torvalds, virtualization,
	kraxel, zach

On Fri, 2006-05-19 at 17:43 -0700, Andrew Morton wrote:
> Rusty Russell <rusty@rustcorp.com.au> wrote:
> >
> > Name: Move vsyscall page out of fixmap into normal vma as per mmap
> 
> This causes mysterious hangs when starting init.
> 
> Distro is RH FC1, running SysVinit-2.85-5.
> 
> dmesg, sysrq-T and .config are at
> http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
> out.
> 
> This is the second time recently when a patch has caused this machine to
> oddly hang in init.  It's possible that there's a bug of some form in that
> version of init that we'll need to know about and take care of in some
> fashion.
> 

hmm curious; FC1 already had the Exec Shield patchkit... otoh no
vsyscall table I suppose


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  1:11         ` Andrew Morton
  2006-05-20  1:15           ` Linus Torvalds
  2006-05-20  1:16           ` [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch Zachary Amsden
@ 2006-05-20  1:49           ` Andi Kleen
  2 siblings, 0 replies; 43+ messages in thread
From: Andi Kleen @ 2006-05-20  1:49 UTC (permalink / raw)
  To: virtualization; +Cc: Andrew Morton, Ingo Molnar, torvalds, linux-kernel

On Saturday 20 May 2006 03:11, Andrew Morton wrote:
> Ingo Molnar <mingo@elte.hu> wrote:
> >
> > 
> > * Andrew Morton <akpm@osdl.org> wrote:
> > 
> > > Rusty Russell <rusty@rustcorp.com.au> wrote:
> > > >
> > > > Name: Move vsyscall page out of fixmap into normal vma as per mmap
> > > 
> > > This causes mysterious hangs when starting init.
> > > 
> > > Distro is RH FC1, running SysVinit-2.85-5.
> > > 
> > > dmesg, sysrq-T and .config are at
> > > http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
> > > out.
> > > 
> > > This is the second time recently when a patch has caused this machine 
> > > to oddly hang in init.  It's possible that there's a bug of some form 
> > > in that version of init that we'll need to know about and take care of 
> > > in some fashion.
> > 
> > FC1 is like really ancient. I think there was a glibc bug that caused 
> > vsyscall related init hangs like that. To nevertheless let people run 
> > their old stuff there's a vdso=0 boot option in exec-shield.
> > 
> 
> Well that patch took a machine from working to non-working.  Pretty serious
> stuff.  We should get to the bottom of the problem so we can assess the
> risk and impact, no?

Just changing the address of the vsyscall page shouldn't break anything. The 
x86-64 32bit emulation has it at a different address than native i386 and 
afaik nothing broke because of that.

-Andi

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  1:15           ` Linus Torvalds
@ 2006-05-20  8:53             ` Ingo Molnar
  2006-05-20  9:26               ` Andrew Morton
  0 siblings, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-20  8:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, rusty, linux-kernel, virtualization, kraxel, zach


* Linus Torvalds <torvalds@osdl.org> wrote:

> > Well that patch took a machine from working to non-working.  Pretty serious
> > stuff.  We should get to the bottom of the problem so we can assess the
> > risk and impact, no?
> 
> Yes. And it would be good to have a way to turn it off - either 
> globally of by some per-process setup (eg off by default, but turn on 
> when doing some magic).
> 
> The per-process one would be the harder one, because it would require 
> the fixmap entry, but not globally. So I suspect the only practical 
> thing would be to have it be a kernel boot-time option.

below is a patch that adds the vdso=0 boot option from exec-shield and 
the /proc/sys/vm/vdso_enabled per-system sysctl.

Andrew, could you try this - do newly started processes work fine if you 
re-enable the vdso after booting with vdso=0? That could tell us whether 
it's an init bug or a glibc bug.

	Ingo

--------
Subject: i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
From: Ingo Molnar <mingo@elte.hu>

add the vdso=0 boot option and the /proc/sys/vm/vdso_enabled sysctl, on 
i386. VDSO defaults to enabled. The runtime switch works fine for newly 
started processes [it does not impact existing process images]:

 # cat /proc/self/maps | grep vdso
 b7f42000-b7f43000 r-xp b7f42000 00:00 0          [vdso]
 # echo 0 > /proc/sys/vm/vdso_enabled
 # cat /proc/self/maps | grep vdso
 # echo 1 > /proc/sys/vm/vdso_enabled
 # cat /proc/self/maps | grep vdso
 b7f05000-b7f06000 r-xp b7f05000 00:00 0          [vdso]
 #

Signed-off-by: Ingo Molnar <mingo@elte.hu>

---
 arch/i386/kernel/sysenter.c |   21 +++++++++++++++++++++
 include/linux/sysctl.h      |    1 +
 kernel/sysctl.c             |   16 ++++++++++++++++
 3 files changed, 38 insertions(+)

Index: linux/arch/i386/kernel/sysenter.c
===================================================================
--- linux.orig/arch/i386/kernel/sysenter.c
+++ linux/arch/i386/kernel/sysenter.c
@@ -22,6 +22,21 @@
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+	vdso_enabled = simple_strtoul(s, NULL, 0);
+
+	return 1;
+}
+
+__setup("vdso=", vdso_setup);
+
 extern asmlinkage void sysenter_entry(void);
 
 void enable_sep_cpu(void)
@@ -97,6 +112,9 @@ int arch_setup_additional_pages(struct l
 	unsigned long addr;
 	int ret;
 
+	if (unlikely(!vdso_enabled))
+		return 0;
+
 	down_write(&mm->mmap_sem);
 	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
@@ -122,16 +140,19 @@ int arch_setup_additional_pages(struct l
 	ret = insert_vm_struct(mm, vma);
 	if (ret)
 		goto free_vma;
+
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return = SYSENTER_RETURN_OFFSET + addr;
 	mm->total_vm++;
 	up_write(&mm->mmap_sem);
+
 	return 0;
 
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 up_fail:
 	up_write(&mm->mmap_sem);
+
 	return ret;
 }
 
Index: linux/include/linux/sysctl.h
===================================================================
--- linux.orig/include/linux/sysctl.h
+++ linux/include/linux/sysctl.h
@@ -186,6 +186,7 @@ enum
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
 	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_VDSO_ENABLED=33,	/* map VDSO into new processes? */
 };
 
 
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -158,6 +158,10 @@ extern ctl_table inotify_table[];
 int sysctl_legacy_va_layout;
 #endif
 
+#ifdef CONFIG_X86_32
+extern int vdso_enabled;
+#endif
+
 /* /proc declarations: */
 
 #ifdef CONFIG_PROC_FS
@@ -915,6 +919,18 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+#ifdef CONFIG_X86_32
+	{
+		.ctl_name	= VM_VDSO_ENABLED,
+		.procname	= "vdso_enabled",
+		.data		= &vdso_enabled,
+		.maxlen		= sizeof(vdso_enabled),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  8:53             ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
@ 2006-05-20  9:26               ` Andrew Morton
  2006-05-20  9:30                 ` Zachary Amsden
                                   ` (4 more replies)
  0 siblings, 5 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-20  9:26 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach

Ingo Molnar <mingo@elte.hu> wrote:
>
> 
> * Linus Torvalds <torvalds@osdl.org> wrote:
> 
> > > Well that patch took a machine from working to non-working.  Pretty serious
> > > stuff.  We should get to the bottom of the problem so we can assess the
> > > risk and impact, no?
> > 
> > Yes. And it would be good to have a way to turn it off - either 
> > globally of by some per-process setup (eg off by default, but turn on 
> > when doing some magic).
> > 
> > The per-process one would be the harder one, because it would require 
> > the fixmap entry, but not globally. So I suspect the only practical 
> > thing would be to have it be a kernel boot-time option.
> 
> below is a patch that adds the vdso=0 boot option from exec-shield and 
> the /proc/sys/vm/vdso_enabled per-system sysctl.
> 
> Andrew, could you try this - do newly started processes work fine if you 
> re-enable the vdso after booting with vdso=0?

vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
vmm:/home/akpm# 
vmm:/home/akpm> ls -l
zsh: segmentation fault  ls -l

> That could tell us whether 
> it's an init bug or a glibc bug.

It tells us neither.  This could be a new kernel bug which only certain old
userspace setups are known to trigger.  Until we know exactly why this is
occurring, we don't know where the bug is.

And once we've worked that thing out, and if we determine that the bug is
in userspace then we might be able to craft the patch in such a fashion
that the old userspace continues to work, which would be a win.

>  arch/i386/kernel/sysenter.c |   21 +++++++++++++++++++++
>  include/linux/sysctl.h      |    1 +
>  kernel/sysctl.c             |   16 ++++++++++++++++
>  3 files changed, 38 insertions(+)

Documentation/kernel-parameters.txt, please.

> +unsigned int vdso_enabled = 1;

__read_mostly.



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:26               ` Andrew Morton
@ 2006-05-20  9:30                 ` Zachary Amsden
  2006-05-20  9:43                   ` Zachary Amsden
  2006-05-20  9:48                   ` Andrew Morton
  2006-05-20  9:54                 ` Ingo Molnar
                                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 43+ messages in thread
From: Zachary Amsden @ 2006-05-20  9:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, torvalds, rusty, linux-kernel, virtualization,
	kraxel

[-- Attachment #1: Type: text/plain, Size: 847 bytes --]

Andrew Morton wrote:
> vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
> vmm:/home/akpm# 
> vmm:/home/akpm> ls -l
> zsh: segmentation fault  ls -l
>
>   
>> That could tell us whether 
>> it's an init bug or a glibc bug.
>>     
>
> It tells us neither.  This could be a new kernel bug which only certain old
> userspace setups are known to trigger.  Until we know exactly why this is
> occurring, we don't know where the bug is.
>
> And once we've worked that thing out, and if we determine that the bug is
> in userspace then we might be able to craft the patch in such a fashion
> that the old userspace continues to work, which would be a win.
>   

Please try my patch - sent earlier, but attached again.  It will tell 
you with 100% confidence if the problem is with userspace expecting the 
vsyscall page to be at a particular address.

[-- Attachment #2: bogo-fixmap --]
[-- Type: text/plain, Size: 512 bytes --]


Index: linux-2.6.17-rc/include/asm-i386/fixmap.h
===================================================================
--- linux-2.6.17-rc.orig/include/asm-i386/fixmap.h	2006-03-19 21:53:29.000000000 -0800
+++ linux-2.6.17-rc/include/asm-i386/fixmap.h	2006-05-19 18:16:00.000000000 -0700
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+#define __FIXADDR_TOP	0xffbff000
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:30                 ` Zachary Amsden
@ 2006-05-20  9:43                   ` Zachary Amsden
  2006-05-20  9:48                   ` Andrew Morton
  1 sibling, 0 replies; 43+ messages in thread
From: Zachary Amsden @ 2006-05-20  9:43 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Andrew Morton, Ingo Molnar, torvalds, rusty, linux-kernel,
	virtualization, kraxel

Zachary Amsden wrote:
> Andrew Morton wrote:
>> vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled vmm:/home/akpm# 
>> vmm:/home/akpm> ls -l
>> zsh: segmentation fault  ls -l
>>
>>  
>>> That could tell us whether it's an init bug or a glibc bug.
>>>     
>>
>> It tells us neither.  This could be a new kernel bug which only 
>> certain old
>> userspace setups are known to trigger.  Until we know exactly why 
>> this is
>> occurring, we don't know where the bug is.
>>
>> And once we've worked that thing out, and if we determine that the 
>> bug is
>> in userspace then we might be able to craft the patch in such a fashion
>> that the old userspace continues to work, which would be a win.
>>   
>
> Please try my patch - sent earlier, but attached again.  It will tell 
> you with 100% confidence if the problem is with userspace expecting 
> the vsyscall page to be at a particular address.

I should rephrase.  100% confidence - engineer stupidity factor is more 
appropriate.  I can't guarantee an absolute.  But it is my belief that 
the only kernel space mapping that could ever have been relied on by 
userspace on i386 is the vsyscall page.

Zach

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:30                 ` Zachary Amsden
  2006-05-20  9:43                   ` Zachary Amsden
@ 2006-05-20  9:48                   ` Andrew Morton
  2006-05-20 10:04                     ` Zachary Amsden
  1 sibling, 1 reply; 43+ messages in thread
From: Andrew Morton @ 2006-05-20  9:48 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: mingo, torvalds, rusty, linux-kernel, virtualization, kraxel

Zachary Amsden <zach@vmware.com> wrote:
>
> Please try my patch - sent earlier, but attached again.  It will tell 
>  you with 100% confidence if the problem is with userspace expecting the 
>  vsyscall page to be at a particular address.
> 
> 
> [bogo-fixmap  text/plain (645 bytes)]
> 
>  Index: linux-2.6.17-rc/include/asm-i386/fixmap.h
>  ===================================================================
>  --- linux-2.6.17-rc.orig/include/asm-i386/fixmap.h	2006-03-19 21:53:29.000000000 -0800
>  +++ linux-2.6.17-rc/include/asm-i386/fixmap.h	2006-05-19 18:16:00.000000000 -0700
>  @@ -20,7 +20,7 @@
>    * Leave one empty page between vmalloc'ed areas and
>    * the start of the fixmap.
>    */
>  -#define __FIXADDR_TOP	0xfffff000
>  +#define __FIXADDR_TOP	0xffbff000

The machine runs OK with that applied and with
move-vsyscall-page-out-of-fixmap-into-normal-vma-as-per-mmap.patch not
applied.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:26               ` Andrew Morton
  2006-05-20  9:30                 ` Zachary Amsden
@ 2006-05-20  9:54                 ` Ingo Molnar
  2006-05-20 10:16                 ` [patch] add print_fatal_signals support Ingo Molnar
                                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 43+ messages in thread
From: Ingo Molnar @ 2006-05-20  9:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach


* Andrew Morton <akpm@osdl.org> wrote:

> >  arch/i386/kernel/sysenter.c |   21 +++++++++++++++++++++
> >  include/linux/sysctl.h      |    1 +
> >  kernel/sysctl.c             |   16 ++++++++++++++++
> >  3 files changed, 38 insertions(+)
> 
> Documentation/kernel-parameters.txt, please.

grumble. I had this done but quilt didnt pick it up.

> > +unsigned int vdso_enabled = 1;
> 
> __read_mostly.

done. New patch attached.

------
Subject: i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
From: Ingo Molnar <mingo@elte.hu>

add the vdso=0 boot option and the /proc/sys/vm/vdso_enabled
sysctl, on i386. VDSO defaults to enabled.

 # cat /proc/self/maps | grep vdso
 b7f42000-b7f43000 r-xp b7f42000 00:00 0          [vdso]
 # echo 0 > /proc/sys/vm/vdso_enabled
 # cat /proc/self/maps | grep vdso
 # echo 1 > /proc/sys/vm/vdso_enabled
 # cat /proc/self/maps | grep vdso
 b7f05000-b7f06000 r-xp b7f05000 00:00 0          [vdso]
 #

Signed-off-by: Ingo Molnar <mingo@elte.hu>

---
 Documentation/kernel-parameters.txt |    4 ++++
 arch/i386/kernel/sysenter.c         |   21 +++++++++++++++++++++
 include/linux/sysctl.h              |    1 +
 kernel/sysctl.c                     |   16 ++++++++++++++++
 4 files changed, 42 insertions(+)

Index: linux-vdso-rand.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-vdso-rand.q.orig/Documentation/kernel-parameters.txt
+++ linux-vdso-rand.q/Documentation/kernel-parameters.txt
@@ -1646,6 +1646,10 @@ running once the system is up.
 	usbhid.mousepoll=
 			[USBHID] The interval which mice are to be polled at.
 
+	vdso=		[IA-32]
+			vdso=1: enable VDSO (default)
+			vdso=0: disable VDSO mapping
+
 	video=		[FB] Frame buffer configuration
 			See Documentation/fb/modedb.txt.
 
Index: linux-vdso-rand.q/arch/i386/kernel/sysenter.c
===================================================================
--- linux-vdso-rand.q.orig/arch/i386/kernel/sysenter.c
+++ linux-vdso-rand.q/arch/i386/kernel/sysenter.c
@@ -22,6 +22,21 @@
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+	vdso_enabled = simple_strtoul(s, NULL, 0);
+
+	return 1;
+}
+
+__setup("vdso=", vdso_setup);
+
 extern asmlinkage void sysenter_entry(void);
 
 void enable_sep_cpu(void)
@@ -97,6 +112,9 @@ int arch_setup_additional_pages(struct l
 	unsigned long addr;
 	int ret;
 
+	if (unlikely(!vdso_enabled))
+		return 0;
+
 	down_write(&mm->mmap_sem);
 	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
@@ -122,16 +140,19 @@ int arch_setup_additional_pages(struct l
 	ret = insert_vm_struct(mm, vma);
 	if (ret)
 		goto free_vma;
+
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return = SYSENTER_RETURN_OFFSET + addr;
 	mm->total_vm++;
 	up_write(&mm->mmap_sem);
+
 	return 0;
 
 free_vma:
 	kmem_cache_free(vm_area_cachep, vma);
 up_fail:
 	up_write(&mm->mmap_sem);
+
 	return ret;
 }
 
Index: linux-vdso-rand.q/include/linux/sysctl.h
===================================================================
--- linux-vdso-rand.q.orig/include/linux/sysctl.h
+++ linux-vdso-rand.q/include/linux/sysctl.h
@@ -186,6 +186,7 @@ enum
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
 	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_VDSO_ENABLED=33,	/* map VDSO into new processes? */
 };
 
 
Index: linux-vdso-rand.q/kernel/sysctl.c
===================================================================
--- linux-vdso-rand.q.orig/kernel/sysctl.c
+++ linux-vdso-rand.q/kernel/sysctl.c
@@ -158,6 +158,10 @@ extern ctl_table inotify_table[];
 int sysctl_legacy_va_layout;
 #endif
 
+#ifdef CONFIG_X86_32
+extern int vdso_enabled;
+#endif
+
 /* /proc declarations: */
 
 #ifdef CONFIG_PROC_FS
@@ -915,6 +919,18 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+#ifdef CONFIG_X86_32
+	{
+		.ctl_name	= VM_VDSO_ENABLED,
+		.procname	= "vdso_enabled",
+		.data		= &vdso_enabled,
+		.maxlen		= sizeof(vdso_enabled),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:48                   ` Andrew Morton
@ 2006-05-20 10:04                     ` Zachary Amsden
  2006-05-21  4:38                       ` Rusty Russell
  0 siblings, 1 reply; 43+ messages in thread
From: Zachary Amsden @ 2006-05-20 10:04 UTC (permalink / raw)
  To: Andrew Morton
  Cc: mingo, torvalds, rusty, linux-kernel, virtualization, kraxel

Andrew Morton wrote:
> Zachary Amsden <zach@vmware.com> wrote:
>   
>> Please try my patch - sent earlier, but attached again.  It will tell 
>>  you with 100% confidence if the problem is with userspace expecting the 
>>  vsyscall page to be at a particular address.
>>
>>
>> [bogo-fixmap  text/plain (645 bytes)]
>>
>>  Index: linux-2.6.17-rc/include/asm-i386/fixmap.h
>>  ===================================================================
>>  --- linux-2.6.17-rc.orig/include/asm-i386/fixmap.h	2006-03-19 21:53:29.000000000 -0800
>>  +++ linux-2.6.17-rc/include/asm-i386/fixmap.h	2006-05-19 18:16:00.000000000 -0700
>>  @@ -20,7 +20,7 @@
>>    * Leave one empty page between vmalloc'ed areas and
>>    * the start of the fixmap.
>>    */
>>  -#define __FIXADDR_TOP	0xfffff000
>>  +#define __FIXADDR_TOP	0xffbff000
>>     
>
> The machine runs OK with that applied and with
> move-vsyscall-page-out-of-fixmap-into-normal-vma-as-per-mmap.patch not
> applied.
>   

Err.  That implies that there is likely a problem in the kernel patch, 
not in userspace,  Let's look more closely at 
move-vsyscall-page-out-of-fixmap-into-normal-vma-as-per-mmap and see if 
there is something missing.

Zach

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [patch] add print_fatal_signals support
  2006-05-20  9:26               ` Andrew Morton
  2006-05-20  9:30                 ` Zachary Amsden
  2006-05-20  9:54                 ` Ingo Molnar
@ 2006-05-20 10:16                 ` Ingo Molnar
  2006-05-21 11:03                 ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
  2006-05-21 14:10                 ` Arjan van de Ven
  4 siblings, 0 replies; 43+ messages in thread
From: Ingo Molnar @ 2006-05-20 10:16 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach


* Andrew Morton <akpm@osdl.org> wrote:

> > That could tell us whether 
> > it's an init bug or a glibc bug.
> 
> It tells us neither.  This could be a new kernel bug which only 
> certain old userspace setups are known to trigger.  Until we know 
> exactly why this is occurring, we don't know where the bug is.

actually i've seen this bug long time ago, just didnt remember whether 
it was an init bug or a glibc bug. I believe this bug is in ld.so, but i 
dont remember the specifics.

i've attached another exec-shield goodie that can help debug such bugs: 
the print-fatal-signals=1 boot option (and /proc/sys/kernel runtime 
switch) causes minimal SIGSEGV's info to be printed to the kernel 
console. The glibc (and distro-installer) folks find it very useful and 
have used it numerous times in the past few years.

	Ingo

------
Subject: add print_fatal_signals support
From: Ingo Molnar <mingo@elte.hu>

add the print-fatal-signals=1 boot option and the
/proc/sys/kernel/print-fatal-signals runtime switch.

this feature prints some minimal information about userspace segfaults
to the kernel console. This is useful to find early bootup bugs where
userspace debugging is very hard.

defaults to off.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt |    6 +++++
 include/linux/sched.h               |    1 
 include/linux/sysctl.h              |    1 
 kernel/signal.c                     |   38 ++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c                     |    8 +++++++
 5 files changed, 54 insertions(+)

Index: linux-vdso-rand.q/Documentation/kernel-parameters.txt
===================================================================
--- linux-vdso-rand.q.orig/Documentation/kernel-parameters.txt
+++ linux-vdso-rand.q/Documentation/kernel-parameters.txt
@@ -1261,6 +1261,12 @@ running once the system is up.
 			autoconfiguration.
 			Ranges are in pairs (memory base and size).
 
+	print-fatal-signals=
+			[KNL] debug: print fatal signals
+			print-fatal-signals=1: print segfault info to
+			the kernel console.
+			default: off.
+
 	profile=	[KNL] Enable kernel profiling via /proc/profile
 			Format: [schedule,]<number>
 			Param: "schedule" - profile schedule points.
Index: linux-vdso-rand.q/include/linux/sched.h
===================================================================
--- linux-vdso-rand.q.orig/include/linux/sched.h
+++ linux-vdso-rand.q/include/linux/sched.h
@@ -40,6 +40,7 @@
 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 
 struct exec_domain;
+extern int print_fatal_signals;
 
 /*
  * cloning flags:
Index: linux-vdso-rand.q/include/linux/sysctl.h
===================================================================
--- linux-vdso-rand.q.orig/include/linux/sysctl.h
+++ linux-vdso-rand.q/include/linux/sysctl.h
@@ -93,6 +93,7 @@ enum
 	KERN_CAP_BSET=14,	/* int: capability bounding set */
 	KERN_PANIC=15,		/* int: panic timeout */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
+	KERN_PRINT_FATAL=17,	/* int: print fatal signals (0/1) */
 
 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
 	KERN_CTLALTDEL=22,	/* int: allow ctl-alt-del to reboot */
Index: linux-vdso-rand.q/kernel/signal.c
===================================================================
--- linux-vdso-rand.q.orig/kernel/signal.c
+++ linux-vdso-rand.q/kernel/signal.c
@@ -763,6 +763,37 @@ out_set:
 #define LEGACY_QUEUE(sigptr, sig) \
 	(((sig) < SIGRTMIN) && sigismember(&(sigptr)->signal, (sig)))
 
+int print_fatal_signals = 0;
+
+static void print_fatal_signal(struct pt_regs *regs, int signr)
+{
+	printk("%s/%d: potentially unexpected fatal signal %d.\n",
+		current->comm, current->pid, signr);
+
+#ifdef __i386__
+	printk("code at %08lx: ", regs->eip);
+	{
+		int i;
+		for (i = 0; i < 16; i++) {
+			unsigned char insn;
+
+			__get_user(insn, (unsigned char *)(regs->eip + i));
+			printk("%02x ", insn);
+		}
+	}
+#endif
+	printk("\n");
+	show_regs(regs);
+}
+
+static int __init setup_print_fatal_signals(char *str)
+{
+	get_option (&str, &print_fatal_signals);
+
+	return 1;
+}
+
+__setup("print-fatal-signals=", setup_print_fatal_signals);
 
 static int
 specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
@@ -1748,6 +1779,11 @@ relock:
 		if (!signr)
 			break; /* will return 0 */
 
+		if ((signr == SIGSEGV) && print_fatal_signals) {
+			spin_unlock_irq(&current->sighand->siglock);
+			print_fatal_signal(regs, signr);
+			spin_lock_irq(&current->sighand->siglock);
+		}
 		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
 			ptrace_signal_deliver(regs, cookie);
 
@@ -1843,6 +1879,8 @@ relock:
 		 * Anything else is fatal, maybe with a core dump.
 		 */
 		current->flags |= PF_SIGNALED;
+		if ((signr != SIGKILL) && print_fatal_signals)
+			print_fatal_signal(regs, signr);
 		if (sig_kernel_coredump(signr)) {
 			/*
 			 * If it was able to dump core, this kills all
Index: linux-vdso-rand.q/kernel/sysctl.c
===================================================================
--- linux-vdso-rand.q.orig/kernel/sysctl.c
+++ linux-vdso-rand.q/kernel/sysctl.c
@@ -330,6 +330,14 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+	{
+		.ctl_name	= KERN_PRINT_FATAL,
+		.procname	= "print-fatal-signals",
+		.data		= &print_fatal_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef __sparc__
 	{
 		.ctl_name	= KERN_SPARC_REBOOT,

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20 10:04                     ` Zachary Amsden
@ 2006-05-21  4:38                       ` Rusty Russell
  2006-05-21  9:35                         ` Rusty Russell
  0 siblings, 1 reply; 43+ messages in thread
From: Rusty Russell @ 2006-05-21  4:38 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: Andrew Morton, mingo, torvalds, linux-kernel, virtualization,
	kraxel

On Sat, 2006-05-20 at 03:04 -0700, Zachary Amsden wrote:
> >>  Index: linux-2.6.17-rc/include/asm-i386/fixmap.h
> >>  ===================================================================
> >>  --- linux-2.6.17-rc.orig/include/asm-i386/fixmap.h	2006-03-19 21:53:29.000000000 -0800
> >>  +++ linux-2.6.17-rc/include/asm-i386/fixmap.h	2006-05-19 18:16:00.000000000 -0700
> >>  @@ -20,7 +20,7 @@
> >>    * Leave one empty page between vmalloc'ed areas and
> >>    * the start of the fixmap.
> >>    */
> >>  -#define __FIXADDR_TOP	0xfffff000
> >>  +#define __FIXADDR_TOP	0xffbff000
> >>     
> >
> > The machine runs OK with that applied and with
> > move-vsyscall-page-out-of-fixmap-into-normal-vma-as-per-mmap.patch not
> > applied.
> >   
> 
> Err.  That implies that there is likely a problem in the kernel patch, 
> not in userspace,  Let's look more closely at 
> move-vsyscall-page-out-of-fixmap-into-normal-vma-as-per-mmap and see if 
> there is something missing.

Indeed.  And I really hate the idea of a global switch for this, too: it
should just work, or autodetect (esp if it's init that failing, this
might be possible).

Off to find FC1...
Rusty.
-- 
 ccontrol: http://ccontrol.ozlabs.org


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21  4:38                       ` Rusty Russell
@ 2006-05-21  9:35                         ` Rusty Russell
  2006-05-21  9:52                           ` Andrew Morton
  2006-05-21 10:41                           ` Ingo Molnar
  0 siblings, 2 replies; 43+ messages in thread
From: Rusty Russell @ 2006-05-21  9:35 UTC (permalink / raw)
  To: Zachary Amsden, Andrew Morton
  Cc: virtualization, torvalds, mingo, linux-kernel

On Sun, 2006-05-21 at 14:38 +1000, Rusty Russell wrote:
> Indeed.  And I really hate the idea of a global switch for this, too: it
> should just work, or autodetect (esp if it's init that failing, this
> might be possible).
> 
> Off to find FC1...

Well, after three hours of downloading, it doesn't boot under qemu
anyway 8(

But it turns out that this is a known problem with FC1's glibc and the
exec-shield patches (google for FC1 glibc vdso).  When Ingo and Arjan
convinced me to push their code from exec-shield, they conveniently
didn't mention this.

So, below is Gerd's original "just place the damn thing above the stack"
patch.  Does this work?  If so, I'm happy for someone else to figure out
a decent way of auto-detecting FC1-style problems.

Thanks,
Rusty.
PS.  Patch was not signed off by Gerd.  Went through Xen tree.

Name: Move vsyscall page out of fixmap, above stack
Author: Gerd Hoffmann <kraxel@suse.de>

Hypervisors want to use memory at the top of the address space
(eg. 64MB for Xen, or 168MB for Xen w/ PAE).  Creating this hole means
moving the vsyscall page away from 0xffffe000.

If we create this hole statically with a config option, we give up,
say, 256MB of lowmem for the case where a hypervisor-capable kernel is
actually running on native hardware.

If we create this hole dynamically and leave the vsyscall page at the
top of kernel memory, we would have to patch up the vsyscall elf
header at boot time to reflect where we put it.

Instead, this patch moves the vsyscall page into the user address
region, just below PAGE_OFFSET: it's still at a fixed address, but
it's not where the hypervisor wants to be, so resizing the hole is
trivial.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/kernel/asm-offsets.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/asm-offsets.c
--- linux-2.6.17-rc4/arch/i386/kernel/asm-offsets.c	2005-07-15 04:38:36.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/asm-offsets.c	2006-05-16 14:24:00.000000000 +1000
@@ -13,6 +13,7 @@
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/thread_info.h>
+#include <asm/elf.h>
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -68,5 +69,5 @@ void foo(void)
 		 sizeof(struct tss_struct));
 
 	DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-	DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+	DEFINE(VSYSCALL_BASE, VSYSCALL_BASE);
 }
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/kernel/sysenter.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/sysenter.c
--- linux-2.6.17-rc4/arch/i386/kernel/sysenter.c	2006-03-23 12:42:01.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/kernel/sysenter.c	2006-05-16 14:27:05.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -45,23 +46,88 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
 
 int __init sysenter_setup(void)
 {
-	void *page = (void *)get_zeroed_page(GFP_ATOMIC);
-
-	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC);
+	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		memcpy(page,
+		memcpy(syscall_page,
 		       &vsyscall_int80_start,
 		       &vsyscall_int80_end - &vsyscall_int80_start);
 		return 0;
 	}
 
-	memcpy(page,
+	memcpy(syscall_page,
 	       &vsyscall_sysenter_start,
 	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
 
 	return 0;
 }
+
+static struct page*
+syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
+{
+	struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+	get_page(p);
+	return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct syscall_vm_ops = {
+	.close = syscall_vma_close,
+	.nopage = syscall_nopage,
+};
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	int ret;
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma)
+		return -ENOMEM;
+
+	memset(vma, 0, sizeof(struct vm_area_struct));
+	/* Could randomize here */
+	vma->vm_start = VSYSCALL_BASE;
+	vma->vm_end = VSYSCALL_BASE + PAGE_SIZE;
+	/* MAYWRITE to allow gdb to COW and set breakpoints */
+	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+	vma->vm_ops = &syscall_vm_ops;
+	vma->vm_mm = mm;
+
+	down_write(&mm->mmap_sem);
+	if ((ret = insert_vm_struct(mm, vma))) {
+		up_write(&mm->mmap_sem);
+		kmem_cache_free(vm_area_cachep, vma);
+		return ret;
+	}
+	mm->total_vm++;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+	return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+	return 0;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+	return 0;
+}
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/arch/i386/mm/pgtable.c working-2.6.17-rc4-vsyscall-above-stack/arch/i386/mm/pgtable.c
--- linux-2.6.17-rc4/arch/i386/mm/pgtable.c	2006-05-16 10:50:48.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/arch/i386/mm/pgtable.c	2006-05-16 14:24:47.000000000 +1000
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -138,6 +139,10 @@ void set_pmd_pfn(unsigned long vaddr, un
 	__flush_tlb_one(vaddr);
 }
 
+static int nr_fixmaps = 0;
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -147,6 +152,13 @@ void __set_fixmap (enum fixed_addresses 
 		return;
 	}
 	set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+	nr_fixmaps++;
+}
+
+void set_fixaddr_top(unsigned long top)
+{
+	BUG_ON(nr_fixmaps > 0);
+	__FIXADDR_TOP = top - PAGE_SIZE;
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/a.out.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/a.out.h
--- linux-2.6.17-rc4/include/asm-i386/a.out.h	2004-02-04 14:43:43.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/a.out.h	2006-05-16 14:24:47.000000000 +1000
@@ -19,7 +19,7 @@ struct exec
 
 #ifdef __KERNEL__
 
-#define STACK_TOP	TASK_SIZE
+#define STACK_TOP	(TASK_SIZE - 3*PAGE_SIZE)
 
 #endif
 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/elf.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/elf.h
--- linux-2.6.17-rc4/include/asm-i386/elf.h	2006-03-23 12:44:01.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/elf.h	2006-05-16 14:24:47.000000000 +1000
@@ -129,11 +129,16 @@ extern int dump_task_extended_fpu (struc
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
 
-#define VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
+#define VSYSCALL_BASE	(PAGE_OFFSET - 2*PAGE_SIZE)
 #define VSYSCALL_EHDR	((const struct elfhdr *) VSYSCALL_BASE)
 #define VSYSCALL_ENTRY	((unsigned long) &__kernel_vsyscall)
 extern void __kernel_vsyscall;
 
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+                                       int executable_stack);
+
 #define ARCH_DLINFO						\
 do {								\
 		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/fixmap.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/fixmap.h
--- linux-2.6.17-rc4/include/asm-i386/fixmap.h	2006-03-23 12:43:10.000000000 +1100
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/fixmap.h	2006-05-16 14:24:47.000000000 +1000
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP	0xfffff000
+extern unsigned long __FIXADDR_TOP;
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -52,7 +52,6 @@
  */
 enum fixed_addresses {
 	FIX_HOLE,
-	FIX_VSYSCALL,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 #endif
@@ -95,6 +94,8 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
 					unsigned long phys, pgprot_t flags);
 
+extern void set_fixaddr_top(unsigned long top);
+
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
 /*
@@ -116,14 +117,6 @@ extern void __set_fixmap (enum fixed_add
 #define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
 
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
-#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
-
-
 extern void __this_fixmap_does_not_exist(void);
 
 /*
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.6.17-rc4/include/asm-i386/page.h working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/page.h
--- linux-2.6.17-rc4/include/asm-i386/page.h	2006-05-16 10:51:38.000000000 +1000
+++ working-2.6.17-rc4-vsyscall-above-stack/include/asm-i386/page.h	2006-05-16 14:24:47.000000000 +1000
@@ -121,7 +121,7 @@ extern int page_is_ram(unsigned long pag
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM			(__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
@@ -137,6 +137,8 @@ extern int page_is_ram(unsigned long pag
 	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
 		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
+#define __HAVE_ARCH_GATE_AREA 1
+
 #endif /* __KERNEL__ */
 
 #include <asm-generic/memory_model.h>

-- 
 ccontrol: http://ccontrol.ozlabs.org


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21  9:35                         ` Rusty Russell
@ 2006-05-21  9:52                           ` Andrew Morton
  2006-05-21 10:41                           ` Ingo Molnar
  1 sibling, 0 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-21  9:52 UTC (permalink / raw)
  To: Rusty Russell; +Cc: zach, virtualization, torvalds, mingo, linux-kernel

Rusty Russell <rusty@rustcorp.com.au> wrote:
>
> Hypervisors want to use memory at the top of the address space
>  (eg. 64MB for Xen, or 168MB for Xen w/ PAE).  Creating this hole means
>  moving the vsyscall page away from 0xffffe000.
> 
>  If we create this hole statically with a config option, we give up,
>  say, 256MB of lowmem for the case where a hypervisor-capable kernel is
>  actually running on native hardware.
> 
>  If we create this hole dynamically and leave the vsyscall page at the
>  top of kernel memory, we would have to patch up the vsyscall elf
>  header at boot time to reflect where we put it.
> 
>  Instead, this patch moves the vsyscall page into the user address
>  region, just below PAGE_OFFSET: it's still at a fixed address, but
>  it's not where the hypervisor wants to be, so resizing the hole is
>  trivial.

Seems to work.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21  9:35                         ` Rusty Russell
  2006-05-21  9:52                           ` Andrew Morton
@ 2006-05-21 10:41                           ` Ingo Molnar
  2006-05-21 11:06                             ` Rusty Russell
  1 sibling, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-21 10:41 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Zachary Amsden, Andrew Morton, virtualization, torvalds,
	linux-kernel


* Rusty Russell <rusty@rustcorp.com.au> wrote:

> But it turns out that this is a known problem with FC1's glibc and the 
> exec-shield patches (google for FC1 glibc vdso). [..]

no, i think that conclusion is wrong. The FC1 glibc and vdso problems 
*when mixing a FC2 kernel with a FC1 glibc* were due to exec-shield 
enforcing non-exec for the vdso.

> [...] When Ingo and Arjan convinced me to push their code from 
> exec-shield, they conveniently didn't mention this.

this bug has nothing to do with nonexec restrictions. [ Also, this all 
was _years_ and hundreds of bugs ago, when upstream's position was still 
a cocky "who the hell needs protection against overflows" and "go away 
with this non-exec crap" so we were pretty much alone trying to 
introduce those features. So any suggestion of intention on our part 
would be quite unfair. ]

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:26               ` Andrew Morton
                                   ` (2 preceding siblings ...)
  2006-05-20 10:16                 ` [patch] add print_fatal_signals support Ingo Molnar
@ 2006-05-21 11:03                 ` Ingo Molnar
  2006-05-21 11:38                   ` Ingo Molnar
  2006-05-21 14:10                 ` Arjan van de Ven
  4 siblings, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-21 11:03 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach


* Andrew Morton <akpm@osdl.org> wrote:

> > Andrew, could you try this - do newly started processes work fine if you 
> > re-enable the vdso after booting with vdso=0?
> 
> vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
> vmm:/home/akpm# 
> vmm:/home/akpm> ls -l
> zsh: segmentation fault  ls -l

Andrew, could you try the patch below, does your FC1 box work with it 
applied and CONFIG_COMPAT_VDSO enabled? (no need to pass any boot 
options)

The config option reinstates the high mapping, so that old glibc can 
reference it as data (that is i think what happened in the original FC1 
glibc). Newer distributions can (and will) turn this off. The option 
defaults to y, so that we are compatible by default.

( Small additional detail: to further limit the security impact of the
  workaround, the page is nonexec, so on the unlikely chance of someone
  running a FC1 glibc with a new kernel on new, NX-capable hardware
  using the PAE i386 kernel, execution is denied on that page. [i have
  tested this kernel combination and execution is indeed denied in that
  case.] )

	Ingo

---
Subject: vDSO: provide workaround for older glibcs
From: Ingo Molnar <mingo@elte.hu>

this patch adds CONFIG_COMPAT_VDSO (default=y), which provides support
for older glibcs to reference the high-mapped VDSO. Newer distributions
(anything newer than say 2 years) can turn this off.

NOTE: the exec bit is turned off for the vDSO, because glibc only
reference the vDSO, but dont try to execute it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/i386/Kconfig           |    9 +++++++++
 arch/i386/kernel/sysenter.c |    5 +++++
 2 files changed, 14 insertions(+)

Index: linux-vdso-rand.q/arch/i386/Kconfig
===================================================================
--- linux-vdso-rand.q.orig/arch/i386/Kconfig
+++ linux-vdso-rand.q/arch/i386/Kconfig
@@ -762,6 +762,15 @@ config HOTPLUG_CPU
 	  enable suspend on SMP systems. CPUs can be controlled through
 	  /sys/devices/system/cpu.
 
+config COMPAT_VDSO
+	bool "Compat VDSO support"
+	default y
+	help
+	  Map the VDSO to the fixed old-style address too.
+	---help---
+	  Say N here if you are running a sufficiently recent glibc
+	  version, to remove the (unused) high-mapped VDSO mapping.
+	  If unsure, say Y.
 
 endmenu
 
Index: linux-vdso-rand.q/arch/i386/kernel/sysenter.c
===================================================================
--- linux-vdso-rand.q.orig/arch/i386/kernel/sysenter.c
+++ linux-vdso-rand.q/arch/i386/kernel/sysenter.c
@@ -69,6 +69,11 @@ int __init sysenter_setup(void)
 {
 	syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
 
+#ifdef CONFIG_COMPAT_VDSO
+	__set_fixmap(FIX_VSYSCALL, __pa(syscall_page), PAGE_READONLY);
+	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VSYSCALL));
+#endif
+
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
 		memcpy(syscall_page,
 		       &vsyscall_int80_start,

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21 10:41                           ` Ingo Molnar
@ 2006-05-21 11:06                             ` Rusty Russell
  0 siblings, 0 replies; 43+ messages in thread
From: Rusty Russell @ 2006-05-21 11:06 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Zachary Amsden, Andrew Morton, virtualization, torvalds,
	linux-kernel

On Sun, 2006-05-21 at 12:41 +0200, Ingo Molnar wrote:
> * Rusty Russell <rusty@rustcorp.com.au> wrote:
> 
> > But it turns out that this is a known problem with FC1's glibc and the 
> > exec-shield patches (google for FC1 glibc vdso). [..]
> 
> no, i think that conclusion is wrong. The FC1 glibc and vdso problems 
> *when mixing a FC2 kernel with a FC1 glibc* were due to exec-shield 
> enforcing non-exec for the vdso.

Interesting.  I'll see if I can find a spare machine to try installing
FC1 on tomorrow then, see if I can figure this one out.  I can't think
how this could happen, though.

> > [...] When Ingo and Arjan convinced me to push their code from 
> > exec-shield, they conveniently didn't mention this.
> 
> this bug has nothing to do with nonexec restrictions. [ Also, this all 
> was _years_ and hundreds of bugs ago, when upstream's position was still 
> a cocky "who the hell needs protection against overflows" and "go away 
> with this non-exec crap" so we were pretty much alone trying to 
> introduce those features. So any suggestion of intention on our part 
> would be quite unfair. ]

Sorry if I was narky.  I tried to do the right thing and get more of
execshield in, rather than just what I needed, but it seems I screwed up
somewhere.  With the Wesnoth 1.2 feature freeze next week, my spare time
to chase bugs I don't need to is limited 8(

Cheers,
Rusty.
-- 
 ccontrol: http://ccontrol.ozlabs.org


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21 11:03                 ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
@ 2006-05-21 11:38                   ` Ingo Molnar
  2006-05-21 12:33                     ` Andrew Morton
  0 siblings, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-21 11:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach


* Ingo Molnar <mingo@elte.hu> wrote:

> > vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
> > vmm:/home/akpm# 
> > vmm:/home/akpm> ls -l
> > zsh: segmentation fault  ls -l
> 
> Andrew, could you try the patch below, does your FC1 box work with it 
> applied and CONFIG_COMPAT_VDSO enabled? (no need to pass any boot 
> options)

in case this doesnt do the trick, could you also try booting with the 
norandmaps boot option?

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21 11:38                   ` Ingo Molnar
@ 2006-05-21 12:33                     ` Andrew Morton
  0 siblings, 0 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-21 12:33 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: torvalds, rusty, linux-kernel, virtualization, kraxel, zach

Ingo Molnar <mingo@elte.hu> wrote:
>
> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
> > > vmm:/home/akpm# 
> > > vmm:/home/akpm> ls -l
> > > zsh: segmentation fault  ls -l
> > 
> > Andrew, could you try the patch below, does your FC1 box work with it 
> > applied and CONFIG_COMPAT_VDSO enabled? (no need to pass any boot 
> > options)
> 
> in case this doesnt do the trick,

It doesn't do the trick.

> could you also try booting with the 
> norandmaps boot option?

Nor does that.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-20  9:26               ` Andrew Morton
                                   ` (3 preceding siblings ...)
  2006-05-21 11:03                 ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
@ 2006-05-21 14:10                 ` Arjan van de Ven
  2006-05-22 14:32                   ` Alexey Kuznetsov
  4 siblings, 1 reply; 43+ messages in thread
From: Arjan van de Ven @ 2006-05-21 14:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, torvalds, rusty, linux-kernel, virtualization,
	kraxel, zach

On Sat, 2006-05-20 at 02:26 -0700, Andrew Morton wrote:
> Ingo Molnar <mingo@elte.hu> wrote:
> >
> > 
> > * Linus Torvalds <torvalds@osdl.org> wrote:
> > 
> > > > Well that patch took a machine from working to non-working.  Pretty serious
> > > > stuff.  We should get to the bottom of the problem so we can assess the
> > > > risk and impact, no?
> > > 
> > > Yes. And it would be good to have a way to turn it off - either 
> > > globally of by some per-process setup (eg off by default, but turn on 
> > > when doing some magic).
> > > 
> > > The per-process one would be the harder one, because it would require 
> > > the fixmap entry, but not globally. So I suspect the only practical 
> > > thing would be to have it be a kernel boot-time option.
> > 
> > below is a patch that adds the vdso=0 boot option from exec-shield and 
> > the /proc/sys/vm/vdso_enabled per-system sysctl.
> > 
> > Andrew, could you try this - do newly started processes work fine if you 
> > re-enable the vdso after booting with vdso=0?
> 
> vmm:/home/akpm# echo 1 > /proc/sys/vm/vdso_enabled 
> vmm:/home/akpm# 
> vmm:/home/akpm> ls -l
> zsh: segmentation fault  ls -l

any chance to get a coredump ?



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled
  2006-05-21 14:10                 ` Arjan van de Ven
@ 2006-05-22 14:32                   ` Alexey Kuznetsov
  0 siblings, 0 replies; 43+ messages in thread
From: Alexey Kuznetsov @ 2006-05-22 14:32 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Andrew Morton, Ingo Molnar, torvalds, rusty, linux-kernel,
	virtualization, kraxel, zach

Hello!

> any chance to get a coredump ?

Been there... ld-linux in glibc-2.3.2 is broken: it does not understand
relocatable VDSO. If vsyscall-sysenter.so is not absolute, which is
the case with exec-shield patch, it dereferences not-relocated pointers
in .dynamic and segfaults.

BTW original Gerd Hoffman's patch as submitted by Rusty works
with libc-2.3.2, it generates good absolute VDSO.

Alexey

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-20  0:43     ` Andrew Morton
  2006-05-20  1:03       ` Ingo Molnar
  2006-05-20  1:24       ` Arjan van de Ven
@ 2006-05-22 16:29       ` Jakub Jelinek
  2006-05-22 16:44         ` Zachary Amsden
  2 siblings, 1 reply; 43+ messages in thread
From: Jakub Jelinek @ 2006-05-22 16:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Rusty Russell, mingo, linux-kernel, torvalds, virtualization,
	kraxel, zach

On Fri, May 19, 2006 at 05:43:03PM -0700, Andrew Morton wrote:
> Rusty Russell <rusty@rustcorp.com.au> wrote:
> >
> > Name: Move vsyscall page out of fixmap into normal vma as per mmap
> 
> This causes mysterious hangs when starting init.
> 
> Distro is RH FC1, running SysVinit-2.85-5.
> 
> dmesg, sysrq-T and .config are at
> http://www.zip.com.au/~akpm/linux/patches/stuff/log-vmm - nothing leaps
> out.
> 
> This is the second time recently when a patch has caused this machine to
> oddly hang in init.  It's possible that there's a bug of some form in that
> version of init that we'll need to know about and take care of in some
> fashion.

That's known bug in early glibcs short after adding vDSO support.
The vDSO support has been added in May 2003 to CVS glibc (i.e. post glibc
2.3.2) and the problems have been fixed when they were discovered, in
February 2004:
http://sources.redhat.com/ml/libc-hacker/2004-02/msg00053.html
http://sources.redhat.com/ml/libc-hacker/2004-02/msg00059.html

I strongly believe we want randomized vDSOs, people are already abusing the
fix mapped vDSO for attacks, and I think the unfortunate 10 months of broken
glibc shouldn't stop that forever.  Anyone using such glibc can still use
vdso=0, or do that just once and upgrade to somewhat more recent glibc.

	Jakub

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 16:29       ` Jakub Jelinek
@ 2006-05-22 16:44         ` Zachary Amsden
  2006-05-22 17:14           ` Andrew Morton
  0 siblings, 1 reply; 43+ messages in thread
From: Zachary Amsden @ 2006-05-22 16:44 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Andrew Morton, Rusty Russell, mingo, linux-kernel, torvalds,
	virtualization, kraxel

Jakub Jelinek wrote:
>
> That's known bug in early glibcs short after adding vDSO support.
> The vDSO support has been added in May 2003 to CVS glibc (i.e. post glibc
> 2.3.2) and the problems have been fixed when they were discovered, in
> February 2004:
> http://sources.redhat.com/ml/libc-hacker/2004-02/msg00053.html
> http://sources.redhat.com/ml/libc-hacker/2004-02/msg00059.html
>
> I strongly believe we want randomized vDSOs, people are already abusing the
> fix mapped vDSO for attacks, and I think the unfortunate 10 months of broken
> glibc shouldn't stop that forever.  Anyone using such glibc can still use
> vdso=0, or do that just once and upgrade to somewhat more recent glibc.
>   

While I'm now inclined to agree with randomization, I think the default 
should be off.  You can quite easily "echo 1 > 
/proc/sys/kernel/vdso_randomization" in the RC scripts, which allows you 
to maintain compatibility for everyone and get randomization turned on 
early enough to thwart attacks against any vulnerable daemons.

Zach

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 16:44         ` Zachary Amsden
@ 2006-05-22 17:14           ` Andrew Morton
  2006-05-22 17:27             ` Ingo Molnar
  0 siblings, 1 reply; 43+ messages in thread
From: Andrew Morton @ 2006-05-22 17:14 UTC (permalink / raw)
  To: Zachary Amsden
  Cc: jakub, rusty, mingo, linux-kernel, torvalds, virtualization,
	kraxel

Zachary Amsden <zach@vmware.com> wrote:
>
> Jakub Jelinek wrote:
> >
> > That's known bug in early glibcs short after adding vDSO support.
> > The vDSO support has been added in May 2003 to CVS glibc (i.e. post glibc
> > 2.3.2) and the problems have been fixed when they were discovered, in
> > February 2004:
> > http://sources.redhat.com/ml/libc-hacker/2004-02/msg00053.html
> > http://sources.redhat.com/ml/libc-hacker/2004-02/msg00059.html
> >
> > I strongly believe we want randomized vDSOs, people are already abusing the
> > fix mapped vDSO for attacks, and I think the unfortunate 10 months of broken
> > glibc shouldn't stop that forever.  Anyone using such glibc can still use
> > vdso=0, or do that just once and upgrade to somewhat more recent glibc.
> >   
> 
> While I'm now inclined to agree with randomization, I think the default 
> should be off.  You can quite easily "echo 1 > 
> /proc/sys/kernel/vdso_randomization" in the RC scripts, which allows you 
> to maintain compatibility for everyone and get randomization turned on 
> early enough to thwart attacks against any vulnerable daemons.
> 

It kinda sucks but yes, that's obviously least-breakage approach.  It does
mean that many people won't benefit from (and won't test!) the new feature
though.

Unless there's some sneaky way of auto-detecting a modern userspace,
perhaps (something which mounts /sys?).

All very sad.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 17:14           ` Andrew Morton
@ 2006-05-22 17:27             ` Ingo Molnar
  2006-05-22 17:46               ` Linus Torvalds
  2006-05-22 17:53               ` Andrew Morton
  0 siblings, 2 replies; 43+ messages in thread
From: Ingo Molnar @ 2006-05-22 17:27 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Zachary Amsden, jakub, rusty, linux-kernel, torvalds,
	virtualization, kraxel


* Andrew Morton <akpm@osdl.org> wrote:

> Zachary Amsden <zach@vmware.com> wrote:
> >
> > Jakub Jelinek wrote:
> > >
> > > That's known bug in early glibcs short after adding vDSO support.
> > > The vDSO support has been added in May 2003 to CVS glibc (i.e. post glibc
> > > 2.3.2) and the problems have been fixed when they were discovered, in
> > > February 2004:
> > > http://sources.redhat.com/ml/libc-hacker/2004-02/msg00053.html
> > > http://sources.redhat.com/ml/libc-hacker/2004-02/msg00059.html
> > >
> > > I strongly believe we want randomized vDSOs, people are already abusing the
> > > fix mapped vDSO for attacks, and I think the unfortunate 10 months of broken
> > > glibc shouldn't stop that forever.  Anyone using such glibc can still use
> > > vdso=0, or do that just once and upgrade to somewhat more recent glibc.
> > >   
> > 
> > While I'm now inclined to agree with randomization, I think the default 
> > should be off.  You can quite easily "echo 1 > 
> > /proc/sys/kernel/vdso_randomization" in the RC scripts, which allows you 
> > to maintain compatibility for everyone and get randomization turned on 
> > early enough to thwart attacks against any vulnerable daemons.
> > 
> 
> It kinda sucks but yes, that's obviously least-breakage approach.  It 
> does mean that many people won't benefit from (and won't test!) the 
> new feature though.

very much so. Especially for security it's really bad if a feature is 
default-off. I'm quite strongly against such an approach.

> Unless there's some sneaky way of auto-detecting a modern userspace, 
> perhaps (something which mounts /sys?).

i'd rather not overdesign it. And unfortunately there is no good way to 
autodetect it.

> All very sad.

is it really a big problem to add "vdso=0" to the long list of 
requirements you need to run a 2.6 kernel on an old distribution (or to 
disable CONFIG_VDSO)? FC1 wasnt even 2.6-ready, it used a 2.4 kernel!

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 17:27             ` Ingo Molnar
@ 2006-05-22 17:46               ` Linus Torvalds
  2006-05-22 19:09                 ` Ingo Molnar
  2006-05-22 19:14                 ` Adrian Bunk
  2006-05-22 17:53               ` Andrew Morton
  1 sibling, 2 replies; 43+ messages in thread
From: Linus Torvalds @ 2006-05-22 17:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Zachary Amsden, jakub, rusty, linux-kernel,
	virtualization, kraxel



On Mon, 22 May 2006, Ingo Molnar wrote:
>
> very much so. Especially for security it's really bad if a feature is 
> default-off. I'm quite strongly against such an approach.

It's not bad at all.

It's default-off FOR THE KERNEL.

Make Fedora updates (and RHEL) just turn it on in the rc scripts. So that 
it's default ON for those, WHEN IT WORKS.

> is it really a big problem to add "vdso=0" to the long list of 
> requirements you need to run a 2.6 kernel on an old distribution (or to 
> disable CONFIG_VDSO)? FC1 wasnt even 2.6-ready, it used a 2.4 kernel!

Backwards compatibility is absolutely paramount. Much more important than 
just about anything else.

		Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 17:27             ` Ingo Molnar
  2006-05-22 17:46               ` Linus Torvalds
@ 2006-05-22 17:53               ` Andrew Morton
  1 sibling, 0 replies; 43+ messages in thread
From: Andrew Morton @ 2006-05-22 17:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: zach, jakub, rusty, linux-kernel, torvalds, virtualization,
	kraxel

Ingo Molnar <mingo@elte.hu> wrote:
>
> is it really a big problem to add "vdso=0" to the long list of 
>  requirements you need to run a 2.6 kernel on an old distribution (or to 
>  disable CONFIG_VDSO)? FC1 wasnt even 2.6-ready, it used a 2.4 kernel!

I assume that FC1-using people aren't the only ones who will be affected by
this.  We just don't know.

Oh well.  One way of finding out is to ship the thing ;)

I seem to have lost the vdso=0 patch and the CONFIG_VDSO patch.  Resend,
please?


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 17:46               ` Linus Torvalds
@ 2006-05-22 19:09                 ` Ingo Molnar
  2006-05-22 19:40                   ` Linus Torvalds
  2006-05-22 19:14                 ` Adrian Bunk
  1 sibling, 1 reply; 43+ messages in thread
From: Ingo Molnar @ 2006-05-22 19:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Zachary Amsden, jakub, rusty, linux-kernel,
	virtualization, kraxel


* Linus Torvalds <torvalds@osdl.org> wrote:

> Backwards compatibility is absolutely paramount. Much more important 
> than just about anything else.

ok, and i agree that in this particular case we should not break older 
glibc. And that's the primary direction i'm going into: i've been trying 
to create CONFIG_COMPAT_VDSO [which defaults to =y] variants that both 
keep old glibc working and still have the randomization code active. 
Having the code active by default is very important because breakages 
get noticed early on, etc.

But wrt. binary compatibility, the vdso (ignoring for a moment that it's 
tied to other parts of glibc) is kind of border line. Nothing but glibc 
knows about its internal structure. So i dont think "binary 
compatibility" per se is violated: no app breaks. This is more analogous 
to the situation where say old modutils cannot read new modules and the 
kernel wont boot at all.

The _real_ argument i think, and the biggest practical difference is 
that glibc is _much harder to upgrade_ than other system utilities. So 
by _that_ argument i'd say we should avoid forcing a glibc dependency 
whenever possible - and that might as well be the right thing to do in 
this particular case.

Also, what makes this a bit different for me is that this is a security 
feature which always has a "should the fire-door be default-open or 
default-closed" type of question, and that's why i'm reluctant to give 
up - and at least have compat-vdso code working that triggers most of 
the randomization codepaths too.

	Ingo

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 17:46               ` Linus Torvalds
  2006-05-22 19:09                 ` Ingo Molnar
@ 2006-05-22 19:14                 ` Adrian Bunk
  2006-05-22 19:45                   ` Linus Torvalds
  1 sibling, 1 reply; 43+ messages in thread
From: Adrian Bunk @ 2006-05-22 19:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Andrew Morton, Zachary Amsden, jakub, rusty,
	linux-kernel, virtualization, kraxel

On Mon, May 22, 2006 at 10:46:33AM -0700, Linus Torvalds wrote:
>...
> > is it really a big problem to add "vdso=0" to the long list of 
> > requirements you need to run a 2.6 kernel on an old distribution (or to 
> > disable CONFIG_VDSO)? FC1 wasnt even 2.6-ready, it used a 2.4 kernel!
> 
> Backwards compatibility is absolutely paramount. Much more important than 
> just about anything else.

Unless I'm misunderstanding this issue, no official glibc release was 
ever affected which makes the probability of other people being affected 
pretty small.

And this issue is about backwards compatibility only insofar, that it 
works around a bug in some ancient cvs versions of glibc.

Is it a new policy that the kernel mustn't break any buggy userspace 
code?

> 		Linus

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 19:09                 ` Ingo Molnar
@ 2006-05-22 19:40                   ` Linus Torvalds
  0 siblings, 0 replies; 43+ messages in thread
From: Linus Torvalds @ 2006-05-22 19:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Zachary Amsden, jakub, rusty, linux-kernel,
	virtualization, kraxel



On Mon, 22 May 2006, Ingo Molnar wrote:
> 
> But wrt. binary compatibility, the vdso (ignoring for a moment that it's 
> tied to other parts of glibc) is kind of border line. Nothing but glibc 
> knows about its internal structure. So i dont think "binary 
> compatibility" per se is violated: no app breaks. This is more analogous 
> to the situation where say old modutils cannot read new modules and the 
> kernel wont boot at all.

No it's not. 

This is totally different from a 2.4.x -> 2.6.x breakage. This is about a 
kernel that used to work (2.6.16) and one that would not (2.6.17).

It's _that_ simple. No ifs, buts, maybe's or anything else.

			Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch
  2006-05-22 19:14                 ` Adrian Bunk
@ 2006-05-22 19:45                   ` Linus Torvalds
  0 siblings, 0 replies; 43+ messages in thread
From: Linus Torvalds @ 2006-05-22 19:45 UTC (permalink / raw)
  To: Adrian Bunk
  Cc: Ingo Molnar, Andrew Morton, Zachary Amsden, jakub, rusty,
	linux-kernel, virtualization, kraxel



On Mon, 22 May 2006, Adrian Bunk wrote:
> 
> Is it a new policy that the kernel mustn't break any buggy userspace 
> code?

It's not a new policy, dammit.

Guys, a kernel developer who cannot understand that user space is 
important should just drop their pretentions of being a kernel developer, 
and go play with some toy system like Hurd instead. There you can say 
"user space doesn't matter".

The kernel has _one_ mission in life, and one mission only. Guess what 
that is? It's to be the buffer between user space and shared resources. 
That's it. NOTHING ELSE MATTERS.

If the kernel breaks user space, the kernel is broken. End of story. 

Yes, there are reasons we must occasionally allow for it anyway, but they 
should all be some pretty damn major ones. Like "we simply _had_ to, there 
was no choice, because the alternatives broke user space more".

Which isn't even _remotely_ the case here.

The whole point of a kernel is that it doesn't do anything for itself. 
It's there to serve user space. 

			Linus

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2006-05-22 19:49 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-16  6:03 [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch Rusty Russell
2006-05-16  6:47 ` Ingo Molnar
2006-05-16  8:16   ` Zachary Amsden
2006-05-16  8:40     ` Chris Wright
2006-05-16  8:59       ` Zachary Amsden
2006-05-17  7:49   ` Rusty Russell
2006-05-18  7:54     ` Ingo Molnar
2006-05-18  8:29       ` Gerd Hoffmann
2006-05-20  0:43     ` Andrew Morton
2006-05-20  1:03       ` Ingo Molnar
2006-05-20  1:11         ` Andrew Morton
2006-05-20  1:15           ` Linus Torvalds
2006-05-20  8:53             ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
2006-05-20  9:26               ` Andrew Morton
2006-05-20  9:30                 ` Zachary Amsden
2006-05-20  9:43                   ` Zachary Amsden
2006-05-20  9:48                   ` Andrew Morton
2006-05-20 10:04                     ` Zachary Amsden
2006-05-21  4:38                       ` Rusty Russell
2006-05-21  9:35                         ` Rusty Russell
2006-05-21  9:52                           ` Andrew Morton
2006-05-21 10:41                           ` Ingo Molnar
2006-05-21 11:06                             ` Rusty Russell
2006-05-20  9:54                 ` Ingo Molnar
2006-05-20 10:16                 ` [patch] add print_fatal_signals support Ingo Molnar
2006-05-21 11:03                 ` [patch] i386, vdso=[0|1] boot option and /proc/sys/vm/vdso_enabled Ingo Molnar
2006-05-21 11:38                   ` Ingo Molnar
2006-05-21 12:33                     ` Andrew Morton
2006-05-21 14:10                 ` Arjan van de Ven
2006-05-22 14:32                   ` Alexey Kuznetsov
2006-05-20  1:16           ` [PATCH] Gerd Hoffman's move-vsyscall-into-user-address-range patch Zachary Amsden
2006-05-20  1:49           ` Andi Kleen
2006-05-20  1:24       ` Arjan van de Ven
2006-05-22 16:29       ` Jakub Jelinek
2006-05-22 16:44         ` Zachary Amsden
2006-05-22 17:14           ` Andrew Morton
2006-05-22 17:27             ` Ingo Molnar
2006-05-22 17:46               ` Linus Torvalds
2006-05-22 19:09                 ` Ingo Molnar
2006-05-22 19:40                   ` Linus Torvalds
2006-05-22 19:14                 ` Adrian Bunk
2006-05-22 19:45                   ` Linus Torvalds
2006-05-22 17:53               ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox