[PATCH] x86: ignore spurious faults

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Ingo Molnar <mingo@elte.hu>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Andi Kleen <ak@suse.de>,
	Harvey Harrison <harvey.harrison@gmail.com>
Subject: [PATCH] x86: ignore spurious faults
Date: Wed, 23 Jan 2008 16:05:33 -0800	[thread overview]
Message-ID: <4797D64D.1060105@goop.org> (raw)

When changing a kernel page from RO->RW, it's OK to leave stale TLB
entries around, since doing a global flush is expensive and they pose
no security problem.  They can, however, generate a spurious fault,
which we should catch and simply return from (which will have the
side-effect of reloading the TLB to the current PTE).

This can occur when running under Xen, because it frequently changes
kernel pages from RW->RO->RW to implement Xen's pagetable semantics.
It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids
doing a global TLB flush after changing page permissions.

[ Changes to fault_32.c and fault_64.c are identical, and should be
  easy unify when the time comes. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
---
 arch/x86/mm/fault_32.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/fault_64.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

===================================================================
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -290,6 +290,53 @@ static int is_errata93(struct pt_regs *r
 
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc or module mapping area
  *
  * This assumes no large pages in there.
@@ -412,6 +459,11 @@ void __kprobes do_page_fault(struct pt_r
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
===================================================================
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -275,6 +275,53 @@ static noinline void pgtable_bad(unsigne
 }
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc area
  *
  * This assumes no large pages in there.
@@ -406,6 +453,11 @@ asmlinkage void __kprobes do_page_fault(
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.

next             reply	other threads:[~2008-01-24  0:05 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-24  0:05 Jeremy Fitzhardinge [this message]
2008-01-24  0:18 ` [PATCH] x86: ignore spurious faults Harvey Harrison
2008-01-24  0:26   ` Jeremy Fitzhardinge
2008-01-24  0:28   ` [PATCH UPDATE] " Jeremy Fitzhardinge
2008-01-24 19:14     ` Matt Mackall
2008-01-24 19:21       ` Jeremy Fitzhardinge
2008-01-24 23:41         ` Nick Piggin
2008-01-25  0:26           ` Jeremy Fitzhardinge
2008-01-25  7:36             ` Keir Fraser
2008-01-25  8:15               ` Jan Beulich
2008-01-25  8:38                 ` Nick Piggin
2008-01-25  9:11                   ` Andi Kleen
2008-01-25  9:18                     ` Keir Fraser
2008-01-25  9:51                       ` Andi Kleen
2008-01-25 10:19                       ` Andi Kleen
2008-01-25 13:17                         ` Keir Fraser
2008-01-25  9:18                   ` Jan Beulich
2008-01-25 15:30     ` Ingo Molnar
2008-01-25 15:54       ` Jeremy Fitzhardinge
2008-01-25 18:08         ` Ingo Molnar
2008-01-25 18:39           ` Jeremy Fitzhardinge
2008-01-24  6:49 ` [PATCH] " Andi Kleen
2008-01-24  7:02   ` Jeremy Fitzhardinge
2008-01-24  7:11     ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4797D64D.1060105@goop.org \
    --to=jeremy@goop.org \
    --cc=ak@suse.de \
    --cc=harvey.harrison@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.