From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: linuxppc-dev@ozlabs.org
Subject: [PATCH 08/12] powerpc: Add support for page fault retry and fatal signals
Date: Fri, 2 Mar 2012 20:35:18 +1100 [thread overview]
Message-ID: <1330680922-6894-9-git-send-email-benh@kernel.crashing.org> (raw)
In-Reply-To: <1330680922-6894-1-git-send-email-benh@kernel.crashing.org>
Other architectures such as x86 and ARM have been growing
new support for features like retrying page faults after
dropping the mm semaphore to break contention, or being
able to return from a stuck page fault when a SIGKILL is
pending.
This refactors our implementation of do_page_fault() to
move the error handling out of line in a way similar to
x86 and adds support for those two features.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
arch/powerpc/mm/fault.c | 170 +++++++++++++++++++++++++++++++++--------------
1 files changed, 120 insertions(+), 50 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7e89006..19f2f94 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -105,6 +105,82 @@ static int store_updates_sp(struct pt_regs *regs)
}
return 0;
}
+/*
+ * do_page_fault error handling helpers
+ */
+
+#define MM_FAULT_RETURN 0
+#define MM_FAULT_CONTINUE -1
+#define MM_FAULT_ERR(sig) (sig)
+
+static int out_of_memory(struct pt_regs *regs)
+{
+ /*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+ up_read(¤t->mm->mmap_sem);
+ if (!user_mode(regs))
+ return MM_FAULT_ERR(SIGKILL);
+ pagefault_out_of_memory();
+ return MM_FAULT_RETURN;
+}
+
+static int do_sigbus(struct pt_regs *regs, unsigned long address)
+{
+ siginfo_t info;
+
+ up_read(¤t->mm->mmap_sem);
+
+ if (user_mode(regs)) {
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, current);
+ return MM_FAULT_RETURN;
+ }
+ return MM_FAULT_ERR(SIGBUS);
+}
+
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
+{
+ /*
+ * Pagefault was interrupted by SIGKILL. We have no reason to
+ * continue the pagefault.
+ */
+ if (fatal_signal_pending(current)) {
+ /*
+ * If we have retry set, the mmap semaphore will have
+ * alrady been released in __lock_page_or_retry(). Else
+ * we release it now.
+ */
+ if (!(fault & VM_FAULT_RETRY))
+ up_read(¤t->mm->mmap_sem);
+ /* Coming from kernel, we need to deal with uaccess fixups */
+ if (user_mode(regs))
+ return MM_FAULT_RETURN;
+ return MM_FAULT_ERR(SIGKILL);
+ }
+
+ /* No fault: be happy */
+ if (!(fault & VM_FAULT_ERROR))
+ return MM_FAULT_CONTINUE;
+
+ /* Out of memory */
+ if (fault & VM_FAULT_OOM)
+ return out_of_memory(regs);
+
+ /* Bus error. x86 handles HWPOISON here, we'll add this if/when
+ * we support the feature in HW
+ */
+ if (fault & VM_FAULT_SIGBUS)
+ return do_sigbus(regs, addr);
+
+ /* We don't understand the fault code, this is fatal */
+ BUG();
+ return MM_FAULT_CONTINUE;
+}
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
@@ -124,11 +200,12 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- siginfo_t info;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
int code = SEGV_MAPERR;
- int is_write = 0, ret;
+ int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int fault;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
@@ -145,6 +222,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
+ if (is_write)
+ flags |= FAULT_FLAG_WRITE;
+
#ifdef CONFIG_PPC_ICSWX
/*
* we need to do this early because this "data storage
@@ -152,13 +232,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* look at it
*/
if (error_code & ICSWX_DSI_UCT) {
- int ret;
-
- ret = acop_handle_fault(regs, address, error_code);
- if (ret)
- return ret;
+ int rc = acop_handle_fault(regs, address, error_code);
+ if (rc)
+ return rc;
}
-#endif
+#endif /* CONFIG_PPC_ICSWX */
if (notify_page_fault(regs))
return 0;
@@ -216,6 +294,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (!user_mode(regs) && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
+retry:
down_read(&mm->mmap_sem);
} else {
/*
@@ -338,30 +417,43 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
- if (unlikely(ret & VM_FAULT_ERROR)) {
- if (ret & VM_FAULT_OOM)
- goto out_of_memory;
- else if (ret & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
+ fault = handle_mm_fault(mm, vma, address, flags);
+ if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+ int rc = mm_fault_error(regs, address, fault);
+ if (rc >= MM_FAULT_RETURN)
+ return rc;
}
- if (ret & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
+
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ current->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- preempt_disable();
- get_lppaca()->page_ins += (1 << PAGE_FACTOR);
- preempt_enable();
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ preempt_disable();
+ get_lppaca()->page_ins += (1 << PAGE_FACTOR);
+ preempt_enable();
+ }
+#endif /* CONFIG_PPC_SMLPAR */
+ } else {
+ current->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ goto retry;
}
-#endif
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
}
+
up_read(&mm->mmap_sem);
return 0;
@@ -382,28 +474,6 @@ bad_area_nosemaphore:
return SIGSEGV;
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (!user_mode(regs))
- return SIGKILL;
- pagefault_out_of_memory();
- return 0;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
- if (user_mode(regs)) {
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return 0;
- }
- return SIGBUS;
}
/*
--
1.7.9
next prev parent reply other threads:[~2012-03-02 9:36 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-03-02 9:35 [PATCH 00/12] powerpc: Low level spring cleaning Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 01/12] powerpc: Remove legacy iSeries bits from assembly files Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 02/12] powerpc: Use the same interrupt prolog for perfmon as other interrupts Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 03/12] powerpc: Rework runlatch code Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 04/12] powerpc: Improve 64-bit syscall entry/exit Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 05/12] powerpc: Improve behaviour of irq tracing on 64-bit exception entry Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 06/12] powerpc: Disable interrupts in 64-bit kernel FP and vector faults Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 07/12] powerpc: Call do_page_fault() with interrupts off Benjamin Herrenschmidt
2012-03-02 9:35 ` Benjamin Herrenschmidt [this message]
2012-03-02 9:35 ` [PATCH 09/12] powerpc/xmon: Add display of soft & hard irq states Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 10/12] powerpc: Fix register clobbering when accumulating stolen time Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 11/12] powerpc: Replace mfmsr instructions with load from PACA kernel_msr field Benjamin Herrenschmidt
2012-03-02 9:35 ` [PATCH 12/12] powerpc: Rework lazy-interrupt handling Benjamin Herrenschmidt
2012-03-04 23:11 ` Benjamin Herrenschmidt
[not found] ` <1330680922-6894-4-git-send-email-benh__48089.0872575857$1330681158$gmane$org@kernel.crashing.org>
2012-04-05 21:38 ` [PATCH 03/12] powerpc: Rework runlatch code Andreas Schwab
2012-04-05 22:12 ` Benjamin Herrenschmidt
2012-04-05 23:05 ` Andreas Schwab
2012-04-11 0:46 ` Benjamin Herrenschmidt
2012-04-11 9:09 ` Andreas Schwab
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1330680922-6894-9-git-send-email-benh@kernel.crashing.org \
--to=benh@kernel.crashing.org \
--cc=linuxppc-dev@ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).