public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: ignore spurious faults
@ 2008-01-24  0:05 Jeremy Fitzhardinge
  2008-01-24  0:18 ` Harvey Harrison
  2008-01-24  6:49 ` [PATCH] " Andi Kleen
  0 siblings, 2 replies; 24+ messages in thread
From: Jeremy Fitzhardinge @ 2008-01-24  0:05 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Linux Kernel Mailing List, Andi Kleen, Harvey Harrison

When changing a kernel page from RO->RW, it's OK to leave stale TLB
entries around, since doing a global flush is expensive and they pose
no security problem.  They can, however, generate a spurious fault,
which we should catch and simply return from (which will have the
side-effect of reloading the TLB to the current PTE).

This can occur when running under Xen, because it frequently changes
kernel pages from RW->RO->RW to implement Xen's pagetable semantics.
It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids
doing a global TLB flush after changing page permissions.

[ Changes to fault_32.c and fault_64.c are identical, and should be
  easy unify when the time comes. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
---
 arch/x86/mm/fault_32.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/fault_64.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

===================================================================
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -290,6 +290,53 @@ static int is_errata93(struct pt_regs *r
 
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc or module mapping area
  *
  * This assumes no large pages in there.
@@ -412,6 +459,11 @@ void __kprobes do_page_fault(struct pt_r
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
===================================================================
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -275,6 +275,53 @@ static noinline void pgtable_bad(unsigne
 }
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc area
  *
  * This assumes no large pages in there.
@@ -406,6 +453,11 @@ asmlinkage void __kprobes do_page_fault(
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.


^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2008-01-25 18:39 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-24  0:05 [PATCH] x86: ignore spurious faults Jeremy Fitzhardinge
2008-01-24  0:18 ` Harvey Harrison
2008-01-24  0:26   ` Jeremy Fitzhardinge
2008-01-24  0:28   ` [PATCH UPDATE] " Jeremy Fitzhardinge
2008-01-24 19:14     ` Matt Mackall
2008-01-24 19:21       ` Jeremy Fitzhardinge
2008-01-24 23:41         ` Nick Piggin
2008-01-25  0:26           ` Jeremy Fitzhardinge
2008-01-25  7:36             ` Keir Fraser
2008-01-25  8:15               ` Jan Beulich
2008-01-25  8:38                 ` Nick Piggin
2008-01-25  9:11                   ` Andi Kleen
2008-01-25  9:18                     ` Keir Fraser
2008-01-25  9:51                       ` Andi Kleen
2008-01-25 10:19                       ` Andi Kleen
2008-01-25 13:17                         ` Keir Fraser
2008-01-25  9:18                   ` Jan Beulich
2008-01-25 15:30     ` Ingo Molnar
2008-01-25 15:54       ` Jeremy Fitzhardinge
2008-01-25 18:08         ` Ingo Molnar
2008-01-25 18:39           ` Jeremy Fitzhardinge
2008-01-24  6:49 ` [PATCH] " Andi Kleen
2008-01-24  7:02   ` Jeremy Fitzhardinge
2008-01-24  7:11     ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox