[PATCH] x86 page fault handler not interrupt safe

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86 page fault handler not interrupt safe
@ 2001-05-06  1:26 Brian Gerst
  2001-05-07  0:53 ` Linus Torvalds
  0 siblings, 1 reply; 24+ messages in thread
From: Brian Gerst @ 2001-05-06  1:26 UTC (permalink / raw)
  To: torvalds, alan; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 534 bytes --]

Currently the page fault handler on the x86 can get a clobbered value
for %cr2 if an interrupt occurs and causes another page fault (interrupt
handler touches a vmalloced area for example) before %cr2 is read.  This
patch changes the page fault and alignment check (currently unused)
handlers to interrupt gates so that %cr2 can be read before an interrupt
can occur.  I'm not certain how much of a problem this really is, but I
suspect it could cause random seg faults to user space under heavy
interrupt load.  Comments are welcome.

[-- Attachment #2: diff-pagefault --]
[-- Type: text/plain, Size: 4684 bytes --]

diff -urN linux-2.4.5-pre1/arch/i386/kernel/entry.S linux/arch/i386/kernel/entry.S
--- linux-2.4.5-pre1/arch/i386/kernel/entry.S	Wed Nov  8 20:09:50 2000
+++ linux/arch/i386/kernel/entry.S	Sat May  5 16:32:42 2001
@@ -296,20 +296,21 @@
 	pushl %ds
 	pushl %eax
 	xorl %eax,%eax
+error_code_cr2:
 	pushl %ebp
 	pushl %edi
 	pushl %esi
 	pushl %edx
-	decl %eax			# eax = -1
 	pushl %ecx
 	pushl %ebx
 	cld
 	movl %es,%ecx
 	movl ORIG_EAX(%esp), %esi	# get the error code
 	movl ES(%esp), %edi		# get the function address
-	movl %eax, ORIG_EAX(%esp)
+	movl $-1, ORIG_EAX(%esp)
 	movl %ecx, ES(%esp)
 	movl %esp,%edx
+	pushl %eax			# push address (cr2)
 	pushl %esi			# push the error code
 	pushl %edx			# push the pt_regs pointer
 	movl $(__KERNEL_DS),%edx
@@ -317,7 +318,7 @@
 	movl %edx,%es
 	GET_CURRENT(%ebx)
 	call *%edi
-	addl $8,%esp
+	addl $12,%esp
 	jmp ret_from_exception
 
 ENTRY(coprocessor_error)
@@ -405,11 +406,18 @@
 
 ENTRY(alignment_check)
 	pushl $ SYMBOL_NAME(do_alignment_check)
-	jmp error_code
+	jmp get_cr2
 
 ENTRY(page_fault)
 	pushl $ SYMBOL_NAME(do_page_fault)
-	jmp error_code
+get_cr2:
+	pushl %ds
+	pushl %eax
+	movl %cr2,%eax
+	testl $IF_MASK,EFLAGS-EAX(%esp)
+	jz error_code_cr2
+	sti
+	jmp error_code_cr2
 
 ENTRY(machine_check)
 	pushl $0
diff -urN linux-2.4.5-pre1/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
--- linux-2.4.5-pre1/arch/i386/kernel/traps.c	Mon Mar 19 21:23:40 2001
+++ linux/arch/i386/kernel/traps.c	Sat May  5 16:03:22 2001
@@ -225,15 +225,6 @@
 		die(str, regs, err);
 }
 
-static inline unsigned long get_cr2(void)
-{
-	unsigned long address;
-
-	/* get the address */
-	__asm__("movl %%cr2,%0":"=r" (address));
-	return address;
-}
-
 static void inline do_trap(int trapnr, int signr, char *str, int vm86,
 			   struct pt_regs * regs, long error_code, siginfo_t *info)
 {
@@ -270,13 +261,13 @@
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code, unsigned long address) \
 { \
 	do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code, unsigned long address) \
 { \
 	siginfo_t info; \
 	info.si_signo = signr; \
@@ -287,13 +278,13 @@
 }
 
 #define DO_VM86_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code, unsigned long address) \
 { \
 	do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
 }
 
 #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code, unsigned long address) \
 { \
 	siginfo_t info; \
 	info.si_signo = signr; \
@@ -314,7 +305,7 @@
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, address)
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
@@ -973,10 +964,10 @@
 	set_trap_gate(11,&segment_not_present);
 	set_trap_gate(12,&stack_segment);
 	set_trap_gate(13,&general_protection);
-	set_trap_gate(14,&page_fault);
+	set_intr_gate(14,&page_fault);
 	set_trap_gate(15,&spurious_interrupt_bug);
 	set_trap_gate(16,&coprocessor_error);
-	set_trap_gate(17,&alignment_check);
+	set_intr_gate(17,&alignment_check);
 	set_trap_gate(18,&machine_check);
 	set_trap_gate(19,&simd_coprocessor_error);
 
diff -urN linux-2.4.5-pre1/arch/i386/mm/fault.c linux/arch/i386/mm/fault.c
--- linux-2.4.5-pre1/arch/i386/mm/fault.c	Wed May  2 09:24:09 2001
+++ linux/arch/i386/mm/fault.c	Sat May  5 17:34:41 2001
@@ -103,19 +103,15 @@
  *	bit 1 == 0 means read, 1 means write
  *	bit 2 == 0 means kernel, 1 means user-mode
  */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
-	unsigned long address;
 	unsigned long page;
 	unsigned long fixup;
 	int write;
 	siginfo_t info;
-
-	/* get the address */
-	__asm__("movl %%cr2,%0":"=r" (address));
 
 	tsk = current;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-06  1:26 [PATCH] x86 page fault handler not interrupt safe Brian Gerst
@ 2001-05-07  0:53 ` Linus Torvalds
  2001-05-07  3:54   ` Brian Gerst
  2001-05-07 10:45   ` Alan Cox
  0 siblings, 2 replies; 24+ messages in thread
From: Linus Torvalds @ 2001-05-07  0:53 UTC (permalink / raw)
  To: Brian Gerst; +Cc: alan, linux-kernel

On Sat, 5 May 2001, Brian Gerst wrote:
>
> Currently the page fault handler on the x86 can get a clobbered value
> for %cr2 if an interrupt occurs and causes another page fault (interrupt
> handler touches a vmalloced area for example) before %cr2 is read.

That should be ok. 

Yes, we'll get a clobbered value, but we'll get a _valid_ clobbered value,
and we'll just end up doing the fixups twice (and returning to the user
process that didn't get the page it wanted, which will end up re-doing the
page fault).

[ Looks closer.. ]

Actually, the second time we'd do the fixup we'd be unhappy, because it
has already been done. That test should probably be removed. Hmm.

Hmm.. The threading people wanted this same thing. Maybe we should just
make it so.

		Linus

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07  0:53 ` Linus Torvalds
@ 2001-05-07  3:54   ` Brian Gerst
  2001-05-07 10:45   ` Alan Cox
  1 sibling, 0 replies; 24+ messages in thread
From: Brian Gerst @ 2001-05-07  3:54 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: alan, linux-kernel

Linus Torvalds wrote:
> 
> On Sat, 5 May 2001, Brian Gerst wrote:
> >
> > Currently the page fault handler on the x86 can get a clobbered value
> > for %cr2 if an interrupt occurs and causes another page fault (interrupt
> > handler touches a vmalloced area for example) before %cr2 is read.
> 
> That should be ok.
> 
> Yes, we'll get a clobbered value, but we'll get a _valid_ clobbered value,
> and we'll just end up doing the fixups twice (and returning to the user
> process that didn't get the page it wanted, which will end up re-doing the
> page fault).
> 
> [ Looks closer.. ]
> 
> Actually, the second time we'd do the fixup we'd be unhappy, because it
> has already been done. That test should probably be removed. Hmm.
> 
> Hmm.. The threading people wanted this same thing. Maybe we should just
> make it so.
> 
>                 Linus

I think it's better to be on the side of correctness.  I designed the
patch to have interrupts disabled for the minimum time possible, so
there should be nearly no impact.

--
					Brian Gerst

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07  0:53 ` Linus Torvalds
  2001-05-07  3:54   ` Brian Gerst
@ 2001-05-07 10:45   ` Alan Cox
  2001-05-07 14:57     ` Brian Gerst
  2001-05-07 16:51     ` Linus Torvalds
  1 sibling, 2 replies; 24+ messages in thread
From: Alan Cox @ 2001-05-07 10:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Brian Gerst, alan, linux-kernel

> Yes, we'll get a clobbered value, but we'll get a _valid_ clobbered value,
> and we'll just end up doing the fixups twice (and returning to the user
> process that didn't get the page it wanted, which will end up re-doing the
> page fault).

I dont see that we will get a valid value in both cases.

	get_user
		fault - set %cr2
					IRQ
					vmalloc
					fault
						set %cr2
						fixup runs
					end IRQ
		cr2 is corrupt

At this point the Linus tree suffers from one problem because it gets parallel
fixups wrong, and the -ac tree suffers from a different problem because
it gets parallel fixups right and to handle that case wont allow a vmalloc
fixup on a fault from userspace (or you get infinite loops)

> [ Looks closer.. ]
> 
> Actually, the second time we'd do the fixup we'd be unhappy, because it
> has already been done. That test should probably be removed. Hmm.

There are a whole set of races with the vmalloc fixups. Most are I think fixed
in the -ac bits if you want a look (I've not submitted them as they are not
too pretty). What I don't currently see is how you handle this without
looping forever or getting the SMP race fixup wrong.

(The current -ac fix for the double vmalloc races is below. WP test makes it
more complex than is nice)

--- /usr/src/LINUS/LINUX2.4/linux.245p1/arch/i386/mm/fault.c	Wed May  2 13:52:04 2001
+++ /usr/src/LINUS/LINUX2.4/linux.ac/arch/i386/mm/fault.c	Fri May  4 15:03:45 2001
@@ -127,8 +183,11 @@
 	 * be in an interrupt or a critical region, and should
 	 * only copy the information from the master page table,
 	 * nothing more.
+	 *
+	 * Handle kernel vmalloc fill in faults. User space doesn't take
+	 * this path. It isnt permitted to go poking up there.
 	 */
-	if (address >= TASK_SIZE)
+	if (address >= TASK_SIZE && !(error_code & 4))
 		goto vmalloc_fault;
 
 	mm = tsk->mm;
@@ -325,7 +378,11 @@
 		 *
 		 * Do _not_ use "tsk" here. We might be inside
 		 * an interrupt in the middle of a task switch..
+		 *
+		 * Note. There is 1 gotcha here. We may be doing the WP
+		 * test. If so then fixing the pgd/pmd won't help.
 		 */
+		 
 		int offset = __pgd_offset(address);
 		pgd_t *pgd, *pgd_k;
 		pmd_t *pmd, *pmd_k;
@@ -344,7 +401,29 @@
 		pmd = pmd_offset(pgd, address);
 		pmd_k = pmd_offset(pgd_k, address);
 
-		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
+		/* If the pmd is present then we either have two cpus trying
+		 * to fill in the vmalloc entries at once, or we have an
+		 * exception. We can treat the collision as a slow path without
+		 * worry. Its incredibly incredibly rare
+		 *
+		 * If the pte is read only then we know its a fault and we must
+		 * exception or Oops as it would loop forever otherwise
+		 */
+		 
+		if (pmd_present(*pmd))
+		{
+			pte_t *ptep = pte_offset(pmd, address);
+			if ((error_code & 2) && !pte_write(*ptep))
+			{
+				if ((fixup = search_exception_table(regs->eip)) != 0) {
+					regs->eip = fixup;
+					return;
+				}
+				goto bad_area_nosemaphore;
+			}
+			/* SMP race.. continue */
+		}
+		if (!pmd_present(*pmd_k))
 			goto bad_area_nosemaphore;
 		set_pmd(pmd, *pmd_k);
 		return;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 10:45   ` Alan Cox
@ 2001-05-07 14:57     ` Brian Gerst
  2001-05-07 15:07       ` Alan Cox
  2001-05-07 16:51     ` Linus Torvalds
  1 sibling, 1 reply; 24+ messages in thread
From: Brian Gerst @ 2001-05-07 14:57 UTC (permalink / raw)
  To: Alan Cox; +Cc: Linus Torvalds, linux-kernel

Alan Cox wrote:
> (The current -ac fix for the double vmalloc races is below. WP test makes it
> more complex than is nice)

WP test is easy to handle.  Just filter out protection violations and
only take the vmalloc path if the page was not found.

-       if (address >= TASK_SIZE && !(error_code & 4))
+       if (address >= TASK_SIZE && !(error_code & 5))

--

				Brian Gerst

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 14:57     ` Brian Gerst
@ 2001-05-07 15:07       ` Alan Cox
  2001-05-07 17:12         ` Linus Torvalds
  2001-05-07 18:35         ` Anton Altaparmakov
  0 siblings, 2 replies; 24+ messages in thread
From: Alan Cox @ 2001-05-07 15:07 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Alan Cox, Linus Torvalds, linux-kernel

> Alan Cox wrote:
> > (The current -ac fix for the double vmalloc races is below. WP test makes it
> > more complex than is nice)
> 
> WP test is easy to handle.  Just filter out protection violations and
> only take the vmalloc path if the page was not found.
> 
> -       if (address >= TASK_SIZE && !(error_code & 4))
> +       if (address >= TASK_SIZE && !(error_code & 5))

That is nice. I hadn't thought about doing it that way. It still has the problem
if %cr2 is corrupted by a vmalloc fault but it cleans up my other code paths
nicely.

Thanks


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 15:07       ` Alan Cox
@ 2001-05-07 17:12         ` Linus Torvalds
  2001-05-07 17:27           ` David Woodhouse
                             ` (3 more replies)
  2001-05-07 18:35         ` Anton Altaparmakov
  1 sibling, 4 replies; 24+ messages in thread
From: Linus Torvalds @ 2001-05-07 17:12 UTC (permalink / raw)
  To: Alan Cox; +Cc: Brian Gerst, linux-kernel

On Mon, 7 May 2001, Alan Cox wrote:
> 
> That is nice. I hadn't thought about doing it that way. It still has the problem
> if %cr2 is corrupted by a vmalloc fault but it cleans up my other code paths
> nicely.

See about "corruption" in previous email. It doesn't exist.

For better debugging, we should probably walk the whole init_mm page table
tree when we take the fault, so this patch does that too: it
unconditionally copies the init_mm page table entries into the current
page table, while at the same time checking that they exist (including the
very last level that we didn't use to check at all).

This means that if you access one page past a vmalloc'ed area, you will
get a nice oops instead of endless page faults that will fix up the page
tables with mappings that already exist.

Untested.

In particular, does anybody have a buggy Pentium to test with the F0 0F
lock-up bug? It _should_ be caught with the error-code test (it's a
protection fault, not a non-present fault and thus the F0 0F case never
enters the vmalloc path), but it's been several years since the thing..

If anybody has such a beast, please try this kernel patch _and_ running
the F0 0F bug-producing program (search for it on the 'net - it must be
out there somewhere) to verify that the code still correctly handles that
case.

		Linus

-----
--- v2.4.4/linux/arch/i386/mm/fault.c	Mon Mar 19 12:35:09 2001
+++ linux/arch/i386/mm/fault.c	Mon May  7 10:01:37 2001
@@ -127,8 +127,12 @@
 	 * be in an interrupt or a critical region, and should
 	 * only copy the information from the master page table,
 	 * nothing more.
+	 *
+	 * This verifies that the fault happens in kernel space
+	 * (error_code & 4) == 0, and that the fault was not a
+	 * protection error (error_code & 1) == 0.
 	 */
-	if (address >= TASK_SIZE)
+	if (address >= TASK_SIZE && !(error_code & 5))
 		goto vmalloc_fault;

 	mm = tsk->mm;
@@ -224,7 +228,6 @@
 bad_area:
 	up_read(&mm->mmap_sem);

-bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & 4) {
 		tsk->thread.cr2 = address;
@@ -322,27 +325,32 @@
 		/*
 		 * Synchronize this task's top level page-table
 		 * with the 'reference' page table.
+		 *
+		 * Do _not_ use "tsk" here. We might be inside
+		 * an interrupt in the middle of a task switch..
 		 */
 		int offset = __pgd_offset(address);
 		pgd_t *pgd, *pgd_k;
 		pmd_t *pmd, *pmd_k;
+		pte_t *pte_k;

-		pgd = tsk->active_mm->pgd + offset;
+		asm("movl %%cr3,%0":"=r" (pgd));
+		pgd = offset + (pgd_t *)__va(pgd);
 		pgd_k = init_mm.pgd + offset;

-		if (!pgd_present(*pgd)) {
-			if (!pgd_present(*pgd_k))
-				goto bad_area_nosemaphore;
-			set_pgd(pgd, *pgd_k);
-			return;
-		}
-
+		if (!pgd_present(*pgd_k))
+			goto no_context;
+		set_pgd(pgd, *pgd_k);
+		
 		pmd = pmd_offset(pgd, address);
 		pmd_k = pmd_offset(pgd_k, address);
-
-		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
-			goto bad_area_nosemaphore;
+		if (!pmd_present(*pmd_k))
+			goto no_context;
 		set_pmd(pmd, *pmd_k);
+
+		pte_k = pte_offset(pmd_k, address);
+		if (!pte_present(*pte_k))
+			goto no_context;
 		return;
 	}
 }

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 17:12         ` Linus Torvalds
@ 2001-05-07 17:27           ` David Woodhouse
  2001-05-07 19:54           ` Brian Gerst
                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: David Woodhouse @ 2001-05-07 17:27 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, Brian Gerst, linux-kernel


torvalds@transmeta.com said:
>  If anybody has such a beast, please try this kernel patch _and_
> running the F0 0F bug-producing program (search for it on the 'net -
> it must be out there somewhere) to verify that the code still
> correctly handles that case. 

Something along the lines of:

echo "unsigned long main=0xf00fc7c8;" > f00fbug.c ; make f00fbug

--
dwmw2



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 17:12         ` Linus Torvalds
  2001-05-07 17:27           ` David Woodhouse
@ 2001-05-07 19:54           ` Brian Gerst
  2001-05-07 20:16             ` Linus Torvalds
  2001-05-07 21:37           ` Alan Cox
  2001-05-07 22:52           ` Jesper Juhl
  3 siblings, 1 reply; 24+ messages in thread
From: Brian Gerst @ 2001-05-07 19:54 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, linux-kernel

Linus Torvalds wrote:
> 
> On Mon, 7 May 2001, Alan Cox wrote:
> >
> > That is nice. I hadn't thought about doing it that way. It still has the problem
> > if %cr2 is corrupted by a vmalloc fault but it cleans up my other code paths
> > nicely.
> 
> See about "corruption" in previous email. It doesn't exist.
> 
> For better debugging, we should probably walk the whole init_mm page table
> tree when we take the fault, so this patch does that too: it
> unconditionally copies the init_mm page table entries into the current
> page table, while at the same time checking that they exist (including the
> very last level that we didn't use to check at all).
> 
> This means that if you access one page past a vmalloc'ed area, you will
> get a nice oops instead of endless page faults that will fix up the page
> tables with mappings that already exist.

This patch will still cause the user process to seg fault: The error
code on the stack will not match the address in %cr2.

	user fault (cr2=useraddr, error_code=5 or 7)
	interrupt
		vmalloc fault (cr2=vmallocaddr, error_code=0 or 2)
			handle vmalloc fault
			iret
		iret
	handle user fault (cr2=vmallocaddr, error_code=5 or 7)

We then fall down to find_vma() which will fail and then send SIGSEGV to
the user process.

--

				Brian Gerst

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 19:54           ` Brian Gerst
@ 2001-05-07 20:16             ` Linus Torvalds
  0 siblings, 0 replies; 24+ messages in thread
From: Linus Torvalds @ 2001-05-07 20:16 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Alan Cox, linux-kernel



On Mon, 7 May 2001, Brian Gerst wrote:
>
> This patch will still cause the user process to seg fault: The error
> code on the stack will not match the address in %cr2.

You've convinced me. Good thinking. Let's do the irq thing.

		Linus


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 17:12         ` Linus Torvalds
  2001-05-07 17:27           ` David Woodhouse
  2001-05-07 19:54           ` Brian Gerst
@ 2001-05-07 21:37           ` Alan Cox
  2001-05-07 22:52           ` Jesper Juhl
  3 siblings, 0 replies; 24+ messages in thread
From: Alan Cox @ 2001-05-07 21:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, Brian Gerst, linux-kernel

> If anybody has such a beast, please try this kernel patch _and_ running
> the F0 0F bug-producing program (search for it on the 'net - it must be

One apparent problem with this implementation

> +	 *
> +	 * This verifies that the fault happens in kernel space
> +	 * (error_code & 4) == 0, and that the fault was not a
> +	 * protection error (error_code & 1) == 0.
>  	 */
> -	if (address >= TASK_SIZE)
> +	if (address >= TASK_SIZE && !(error_code & 5))
>  		goto vmalloc_fault;

address might be from the following vmalloc fault. The error code would
indicate user space, so we would do a bogus user space fix up for vmalloc
space, fault and die.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 17:12         ` Linus Torvalds
                             ` (2 preceding siblings ...)
  2001-05-07 21:37           ` Alan Cox
@ 2001-05-07 22:52           ` Jesper Juhl
  3 siblings, 0 replies; 24+ messages in thread
From: Jesper Juhl @ 2001-05-07 22:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

Linus Torvalds wrote:

> 
> In particular, does anybody have a buggy Pentium to test with the F0 0F
> lock-up bug? It _should_ be caught with the error-code test (it's a
> protection fault, not a non-present fault and thus the F0 0F case never
> enters the vmalloc path), but it's been several years since the thing..
> 
> If anybody has such a beast, please try this kernel patch _and_ running
> the F0 0F bug-producing program (search for it on the 'net - it must be
> out there somewhere) to verify that the code still correctly handles that
> case.
> 

I have a Thinkpad with a buggy pentium (see cat /proc/cpuinfo below) and 
I tried running the F00F test program available from 
http://lwn.net/2001/0329/a/ltp-f00f.php3 first on a 2.2.17 kernel that 
I've been running for ages without problems I got this output:

Testing for proper f00f instruction handling.
SIGILL received from f00f instruction.  Good.

Then I tried 2.4.4 with your patch applied and got the same output (and 
no lockup), so according to that test program your patch does not break 
the F00F handling code. :-)

If you want me to test other patches, just let me know and I'll be happy 
to do so!

$ cat /proc/cpuinfo
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 5
model           : 8
model name      : Mobile Pentium MMX
stepping        : 1
cpu MHz         : 232.111
fdiv_bug        : no
hlt_bug         : no
sep_bug         : no
f00f_bug        : yes
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 1
wp              : yes
flags           : fpu vme de pse tsc msr mce cx8 mmx
bogomips        : 463.67


Best regards,
Jesper Juhl - juhl@eisenstein.dk


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 15:07       ` Alan Cox
  2001-05-07 17:12         ` Linus Torvalds
@ 2001-05-07 18:35         ` Anton Altaparmakov
  1 sibling, 0 replies; 24+ messages in thread
From: Anton Altaparmakov @ 2001-05-07 18:35 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, Brian Gerst, linux-kernel

At 18:12 07/05/2001, Linus Torvalds wrote:
>Untested.
>
>In particular, does anybody have a buggy Pentium to test with the F0 0F
>lock-up bug?

Yes, I have one. 2.4.3-ac6 (plus a few patches) detects the bug on boot up 
and enables the work around. Running the f00f test program from SGI results 
in the correct behaviour of a SIGILL signal being sent to the program.

>If anybody has such a beast, please try this kernel patch _and_ running

Oh my, I always considered it as a cute, fluffy bunny. No need to 
bestialize it unnecessarily... (-;

>the F0 0F bug-producing program (search for it on the 'net - it must be
>out there somewhere) to verify that the code still correctly handles that
>case.

Am compiling 2.4.5-pre1 with your patch (manually applied as parts of it 
were already in .5-pre1) right now. I will follow-up with results when it 
has finished and I have tested it (i.e. don't hold your breath, it might be 
a beast but it is a slow one...)

Best regards,

Anton

-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Linux NTFS Maintainer / WWW: http://sourceforge.net/projects/linux-ntfs/
ICQ: 8561279 / WWW: http://www-stu.christs.cam.ac.uk/~aia21/

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 10:45   ` Alan Cox
  2001-05-07 14:57     ` Brian Gerst
@ 2001-05-07 16:51     ` Linus Torvalds
  1 sibling, 0 replies; 24+ messages in thread
From: Linus Torvalds @ 2001-05-07 16:51 UTC (permalink / raw)
  To: Alan Cox; +Cc: Brian Gerst, linux-kernel


On Mon, 7 May 2001, Alan Cox wrote:
> 
> I dont see that we will get a valid value in both cases.
> 
> 	get_user
> 		fault - set %cr2
> 					IRQ
> 					vmalloc
> 					fault
> 						set %cr2
> 						fixup runs
> 					end IRQ
> 		cr2 is corrupt

Wrong. "%cr2" is _not_ "corrupt". It has a well-defined value.

So what happens is

	get_user (or user-mode access)
		fault - set %cr2 to fault1
			irq
			vmalloc fault - set %cr2 to fault2
				fixup runs, iret
			irq runs, iret
		%cr2 is still %fault2
		vmalloc fault - nothing to do
		"false fixup" runs, iret
	get_user (or user-mode access)
		fault - set %cr2 to fault1
		... get the right behaviour now ...
		
> There are a whole set of races with the vmalloc fixups.

As far as I can tell, there are no races anywhere. Just silly bugs that
are hard to see.

		Linus


^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] x86 page fault handler not interrupt safe
@ 2001-05-07 17:32 Dunlap, Randy
  2001-05-07 17:51 ` David Woodhouse
  0 siblings, 1 reply; 24+ messages in thread
From: Dunlap, Randy @ 2001-05-07 17:32 UTC (permalink / raw)
  To: 'David Woodhouse', Linus Torvalds
  Cc: Alan Cox, Brian Gerst, linux-kernel

> From: David Woodhouse [mailto:dwmw2@infradead.org]
> 
> torvalds@transmeta.com said:
> >  If anybody has such a beast, please try this kernel patch _and_
> > running the F0 0F bug-producing program (search for it on the 'net -
> > it must be out there somewhere) to verify that the code still
> > correctly handles that case. 
> 
> Something along the lines of:
> 
> echo "unsigned long main=0xf00fc7c8;" > f00fbug.c ; make f00fbug

Yes, that's what the (SGI) program uses:
http://lwn.net/2001/0329/a/ltp-f00f.php3

~Randy


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 17:32 Dunlap, Randy
@ 2001-05-07 17:51 ` David Woodhouse
  0 siblings, 0 replies; 24+ messages in thread
From: David Woodhouse @ 2001-05-07 17:51 UTC (permalink / raw)
  To: Dunlap, Randy; +Cc: Linus Torvalds, Alan Cox, Brian Gerst, linux-kernel


randy.dunlap@intel.com said:
> > echo "unsigned long main=0xf00fc7c8;" > f00fbug.c ; make f00fbug

> Yes, that's what the (SGI) program uses: 
> http://lwn.net/2001/0329/a/ltp-f00f.php3

Restated on l-k for the benefit of anyone naïve enough to expect me to have 
got it right... my original version would only work on the bigendian models. 
For the rest, try:

echo "unsigned main = 0xc8c70ff0;" > f00fbug.c ; make f00fbug

--
dwmw2



^ permalink raw reply	[flat|nested] 24+ messages in thread

[parent not found: <D5E932F578EBD111AC3F00A0C96B1E6F07DBE26F@orsmsx31.jf.intel .com>]

* RE: [PATCH] x86 page fault handler not interrupt safe
       [not found] <D5E932F578EBD111AC3F00A0C96B1E6F07DBE26F@orsmsx31.jf.intel .com>
@ 2001-05-07 17:52 ` Anton Altaparmakov
  0 siblings, 0 replies; 24+ messages in thread
From: Anton Altaparmakov @ 2001-05-07 17:52 UTC (permalink / raw)
  To: Dunlap, Randy
  Cc: 'David Woodhouse', Linus Torvalds, Alan Cox, Brian Gerst,
	linux-kernel

At 18:32 07/05/2001, Dunlap, Randy wrote:
> > From: David Woodhouse [mailto:dwmw2@infradead.org]
> >
> > torvalds@transmeta.com said:
> > >  If anybody has such a beast, please try this kernel patch _and_
> > > running the F0 0F bug-producing program (search for it on the 'net -
> > > it must be out there somewhere) to verify that the code still
> > > correctly handles that case.
> >
> > Something along the lines of:
> >
> > echo "unsigned long main=0xf00fc7c8;" > f00fbug.c ; make f00fbug
>
>Yes, that's what the (SGI) program uses:
>http://lwn.net/2001/0329/a/ltp-f00f.php3

That's not quite what they do. David's SGI equivalent would be:

echo "unsigned long main=0xc8c70ff0;" > f00fbug.c ; make f00fbug

i.e. remember that ia32 is little endian.

Thanks for the link.

Best regards,

Anton


-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Linux NTFS Maintainer / WWW: http://sourceforge.net/projects/linux-ntfs/
ICQ: 8561279 / WWW: http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 24+ messages in thread

[parent not found: <3AF712D5.5D712E0F@didntduck.org>]

* Re: [PATCH] x86 page fault handler not interrupt safe
       [not found] <3AF712D5.5D712E0F@didntduck.org>
@ 2001-05-07 21:44 ` Linus Torvalds
  2001-05-07 22:10   ` Brian Gerst
  2001-05-08 10:45   ` Alan Cox
  2001-05-07 21:53 ` Nigel Gamble
  1 sibling, 2 replies; 24+ messages in thread
From: Linus Torvalds @ 2001-05-07 21:44 UTC (permalink / raw)
  To: Brian Gerst; +Cc: nigel, Alan Cox, linux-kernel



On Mon, 7 May 2001, Brian Gerst wrote:
>
> Keep in mind that regs->eflags could be from user space, and could have
> some undesirable flags set.  That's why I did a test/sti instead of
> reloading eflags.  Plus my patch leaves interrupts disabled for the
> minimum time possible.

The plain "popf" should be ok: the way intel works, you cannot actually
use popf to set any of the strange flags (if vm86 mode etc).

I like the size of this alternate patch.

		Linus


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 21:44 ` Linus Torvalds
@ 2001-05-07 22:10   ` Brian Gerst
  2001-05-08 10:45   ` Alan Cox
  1 sibling, 0 replies; 24+ messages in thread
From: Brian Gerst @ 2001-05-07 22:10 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: nigel, Alan Cox, linux-kernel

Linus Torvalds wrote:
> 
> On Mon, 7 May 2001, Brian Gerst wrote:
> >
> > Keep in mind that regs->eflags could be from user space, and could have
> > some undesirable flags set.  That's why I did a test/sti instead of
> > reloading eflags.  Plus my patch leaves interrupts disabled for the
> > minimum time possible.
> 
> The plain "popf" should be ok: the way intel works, you cannot actually
> use popf to set any of the strange flags (if vm86 mode etc).

TF (trap flag) would be one that I wouldn't want to get reset.  You
could use
	local_irq_restore(regs->eflags & ~(TF|RF|VM|NT))
which are all the flags cleared by an interrupt/trap gate.  Or
	if (regs->eflags & IF) sti();
Take your pick.

--

				Brian Gerst

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-07 21:44 ` Linus Torvalds
  2001-05-07 22:10   ` Brian Gerst
@ 2001-05-08 10:45   ` Alan Cox
  2001-05-08 16:45     ` Linus Torvalds
  1 sibling, 1 reply; 24+ messages in thread
From: Alan Cox @ 2001-05-08 10:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Brian Gerst, nigel, Alan Cox, linux-kernel

> On Mon, 7 May 2001, Brian Gerst wrote:
> >
> > Keep in mind that regs->eflags could be from user space, and could have
> > some undesirable flags set.  That's why I did a test/sti instead of
> > reloading eflags.  Plus my patch leaves interrupts disabled for the
> > minimum time possible.
> 
> The plain "popf" should be ok: the way intel works, you cannot actually
> use popf to set any of the strange flags (if vm86 mode etc).
> 
> I like the size of this alternate patch.

I dont see where the alternative patch ensures the user didnt flip the
direction flag for one


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-08 10:45   ` Alan Cox
@ 2001-05-08 16:45     ` Linus Torvalds
  2001-05-09 22:12       ` Brian Gerst
  0 siblings, 1 reply; 24+ messages in thread
From: Linus Torvalds @ 2001-05-08 16:45 UTC (permalink / raw)
  To: Alan Cox; +Cc: Brian Gerst, nigel, linux-kernel


On Tue, 8 May 2001, Alan Cox wrote:
> 
> I dont see where the alternative patch ensures the user didnt flip the
> direction flag for one

Yeah. 

We might as well just make it "eflags & IF", none of the other flags
should matter (or we explicitly want them cleared).

		Linus


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
  2001-05-08 16:45     ` Linus Torvalds
@ 2001-05-09 22:12       ` Brian Gerst
  0 siblings, 0 replies; 24+ messages in thread
From: Brian Gerst @ 2001-05-09 22:12 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, nigel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 650 bytes --]

Linus Torvalds wrote:
> 
> On Tue, 8 May 2001, Alan Cox wrote:
> >
> > I dont see where the alternative patch ensures the user didnt flip the
> > direction flag for one
> 
> Yeah.
> 
> We might as well just make it "eflags & IF", none of the other flags
> should matter (or we explicitly want them cleared).
> 
>                 Linus

Here is an updated patch.  After reading over the Intel docs, and some
testing on my Athlon, I found that %cr2 is not set on alignment check
faults.  I replaced it with the address of the faulting instruction.  It
may work on an Intel but is undocumented.  The eip makes more sense
anyways.

-- 

						Brian Gerst

[-- Attachment #2: diff-pagefault2 --]
[-- Type: text/plain, Size: 2109 bytes --]

diff -urN linux-2.4.5-pre1/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
--- linux-2.4.5-pre1/arch/i386/kernel/traps.c	Mon Mar 19 21:23:40 2001
+++ linux/arch/i386/kernel/traps.c	Wed May  9 17:51:58 2001
@@ -225,15 +225,6 @@
 		die(str, regs, err);
 }
 
-static inline unsigned long get_cr2(void)
-{
-	unsigned long address;
-
-	/* get the address */
-	__asm__("movl %%cr2,%0":"=r" (address));
-	return address;
-}
-
 static void inline do_trap(int trapnr, int signr, char *str, int vm86,
 			   struct pt_regs * regs, long error_code, siginfo_t *info)
 {
@@ -314,7 +305,7 @@
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, regs->eip)
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
@@ -973,7 +964,7 @@
 	set_trap_gate(11,&segment_not_present);
 	set_trap_gate(12,&stack_segment);
 	set_trap_gate(13,&general_protection);
-	set_trap_gate(14,&page_fault);
+	set_intr_gate(14,&page_fault);
 	set_trap_gate(15,&spurious_interrupt_bug);
 	set_trap_gate(16,&coprocessor_error);
 	set_trap_gate(17,&alignment_check);
diff -urN linux-2.4.5-pre1/arch/i386/mm/fault.c linux/arch/i386/mm/fault.c
--- linux-2.4.5-pre1/arch/i386/mm/fault.c	Wed May  2 09:24:09 2001
+++ linux/arch/i386/mm/fault.c	Wed May  9 17:18:17 2001
@@ -98,6 +98,9 @@
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  *
+ * This is called with interrupts off, to protect %cr2 from being
+ * overwritten by an interrupt handler that faults.
+ *
  * error_code:
  *	bit 0 == 0 means no page found, 1 means protection fault
  *	bit 1 == 0 means read, 1 means write
@@ -116,6 +119,10 @@
 
 	/* get the address */
 	__asm__("movl %%cr2,%0":"=r" (address));
+
+	/* Reenable interrupts, but don't trust any other flags */
+	if (regs->eflags & X86_EFLAGS_IF)
+		sti();
 
 	tsk = current;
 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
       [not found] <3AF712D5.5D712E0F@didntduck.org>
  2001-05-07 21:44 ` Linus Torvalds
@ 2001-05-07 21:53 ` Nigel Gamble
  1 sibling, 0 replies; 24+ messages in thread
From: Nigel Gamble @ 2001-05-07 21:53 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Linus Torvalds, Alan Cox, linux-kernel

On Mon, 7 May 2001, Brian Gerst wrote:
> Nigel Gamble wrote:
> > 
> > On Mon, 7 May 2001, Linus Torvalds wrote:
> > > On Mon, 7 May 2001, Brian Gerst wrote:
> > > > This patch will still cause the user process to seg fault: The error
> > > > code on the stack will not match the address in %cr2.
> > >
> > > You've convinced me. Good thinking. Let's do the irq thing.
> > 
> > I've actually seen user processes seg faulting because of this with the
> > fully preemptible kernel patch applied.  The fix we used in that patch
> > was to use an interrupt gate for the fault handler, then to simply
> > restore the interrupt state:
> 
> Keep in mind that regs->eflags could be from user space, and could have
> some undesirable flags set.  That's why I did a test/sti instead of

Good point.

> reloading eflags.  Plus my patch leaves interrupts disabled for the
> minimum time possible.

I'm not sure that it makes much difference, as interrupts are disabled
for such a short time anyway.  I'd prefer to put the test/sti in
do_page_fault(), and reduce the complexity needed in assembler routines
as much as possible, for maintainability reasons.

Nigel Gamble                                    nigel@nrg.org
Mountain View, CA, USA.                         http://www.nrg.org/

MontaVista Software                             nigel@mvista.com


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] x86 page fault handler not interrupt safe
@ 2001-05-07 21:58 Anton Altaparmakov
  0 siblings, 0 replies; 24+ messages in thread
From: Anton Altaparmakov @ 2001-05-07 21:58 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alan Cox, Brian Gerst, linux-kernel

At 18:12 07/05/2001, Linus Torvalds wrote:
>In particular, does anybody have a buggy Pentium to test with the F0 0F
>lock-up bug?
[snip]
>If anybody has such a beast, please try this kernel patch _and_ running

Still works ok (2.4.5-pre1 + patch). SIGILL is sent.

Best regards,

Anton


-- 
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Linux NTFS Maintainer / WWW: http://sourceforge.net/projects/linux-ntfs/
ICQ: 8561279 / WWW: http://www-stu.christs.cam.ac.uk/~aia21/


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2001-05-09 22:20 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-05-06  1:26 [PATCH] x86 page fault handler not interrupt safe Brian Gerst
2001-05-07  0:53 ` Linus Torvalds
2001-05-07  3:54   ` Brian Gerst
2001-05-07 10:45   ` Alan Cox
2001-05-07 14:57     ` Brian Gerst
2001-05-07 15:07       ` Alan Cox
2001-05-07 17:12         ` Linus Torvalds
2001-05-07 17:27           ` David Woodhouse
2001-05-07 19:54           ` Brian Gerst
2001-05-07 20:16             ` Linus Torvalds
2001-05-07 21:37           ` Alan Cox
2001-05-07 22:52           ` Jesper Juhl
2001-05-07 18:35         ` Anton Altaparmakov
2001-05-07 16:51     ` Linus Torvalds
  -- strict thread matches above, loose matches on Subject: below --
2001-05-07 17:32 Dunlap, Randy
2001-05-07 17:51 ` David Woodhouse
     [not found] <D5E932F578EBD111AC3F00A0C96B1E6F07DBE26F@orsmsx31.jf.intel .com>
2001-05-07 17:52 ` Anton Altaparmakov
     [not found] <3AF712D5.5D712E0F@didntduck.org>
2001-05-07 21:44 ` Linus Torvalds
2001-05-07 22:10   ` Brian Gerst
2001-05-08 10:45   ` Alan Cox
2001-05-08 16:45     ` Linus Torvalds
2001-05-09 22:12       ` Brian Gerst
2001-05-07 21:53 ` Nigel Gamble
2001-05-07 21:58 Anton Altaparmakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox