stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>,
	David Rientjes <rientjes@google.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	azurIt <azurit@pobox.sk>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 3.10 65/70] arch: mm: pass userspace fault flag to generic fault handler
Date: Wed, 19 Nov 2014 12:52:56 -0800	[thread overview]
Message-ID: <20141119205213.062595580@linuxfoundation.org> (raw)
In-Reply-To: <20141119205210.913169042@linuxfoundation.org>

3.10-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Johannes Weiner <hannes@cmpxchg.org>

commit 759496ba6407c6994d6a5ce3a5e74937d7816208 upstream.

Unlike global OOM handling, memory cgroup code will invoke the OOM killer
in any OOM situation because it has no way of telling faults occuring in
kernel context - which could be handled more gracefully - from
user-triggered faults.

Pass a flag that identifies faults originating in user space from the
architecture-specific fault handlers to generic code so that memcg OOM
handling can be improved.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/alpha/mm/fault.c      |    7 ++++---
 arch/arc/mm/fault.c        |    6 ++++--
 arch/arm/mm/fault.c        |    9 ++++++---
 arch/arm64/mm/fault.c      |   17 ++++++++++-------
 arch/avr32/mm/fault.c      |    2 ++
 arch/cris/mm/fault.c       |    6 ++++--
 arch/frv/mm/fault.c        |   10 ++++++----
 arch/hexagon/mm/vm_fault.c |    6 ++++--
 arch/ia64/mm/fault.c       |    6 ++++--
 arch/m32r/mm/fault.c       |   10 ++++++----
 arch/m68k/mm/fault.c       |    2 ++
 arch/metag/mm/fault.c      |    6 ++++--
 arch/microblaze/mm/fault.c |    7 +++++--
 arch/mips/mm/fault.c       |    6 ++++--
 arch/mn10300/mm/fault.c    |    2 ++
 arch/openrisc/mm/fault.c   |    1 +
 arch/parisc/mm/fault.c     |    7 +++++--
 arch/powerpc/mm/fault.c    |    7 ++++---
 arch/s390/mm/fault.c       |    2 ++
 arch/score/mm/fault.c      |    7 ++++++-
 arch/sh/mm/fault.c         |    9 ++++++---
 arch/sparc/mm/fault_32.c   |   12 +++++++++---
 arch/sparc/mm/fault_64.c   |    6 ++++--
 arch/tile/mm/fault.c       |    7 +++++--
 arch/um/kernel/trap.c      |   20 ++++++++++++--------
 arch/unicore32/mm/fault.c  |    8 ++++++--
 arch/x86/mm/fault.c        |    8 +++++---
 arch/xtensa/mm/fault.c     |    2 ++
 include/linux/mm.h         |    1 +
 29 files changed, 135 insertions(+), 64 deletions(-)

--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -89,8 +89,7 @@ do_page_fault(unsigned long address, uns
 	const struct exception_table_entry *fixup;
 	int fault, si_code = SEGV_MAPERR;
 	siginfo_t info;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (cause > 0 ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
 	   (or is suppressed by the PALcode).  Support that for older CPUs
@@ -115,7 +114,8 @@ do_page_fault(unsigned long address, uns
 	if (address >= TASK_SIZE)
 		goto vmalloc_fault;
 #endif
-
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -142,6 +142,7 @@ retry:
 	} else {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	}
 
 	/* If for any reason at all we couldn't handle the fault,
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -59,8 +59,7 @@ void do_page_fault(struct pt_regs *regs,
 	struct mm_struct *mm = tsk->mm;
 	siginfo_t info;
 	int fault, ret;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
@@ -88,6 +87,8 @@ void do_page_fault(struct pt_regs *regs,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -115,6 +116,7 @@ good_area:
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsign
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	int write = fsr & FSR_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (notify_page_fault(regs, fsr))
 		return 0;
@@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsign
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (fsr & FSR_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
 	 * validly references user space from well defined areas of the code,
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsig
 	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-	if (esr & ESR_LNX_EXEC) {
-		vm_flags = VM_EXEC;
-	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
-		vm_flags = VM_WRITE;
-		mm_flags |= FAULT_FLAG_WRITE;
-	}
-
 	tsk = current;
 	mm  = tsk->mm;
 
@@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsig
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		mm_flags |= FAULT_FLAG_USER;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
+
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
 	 * validly references user space from well defined areas of the code,
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned l
 
 	local_irq_enable();
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -58,8 +58,7 @@ do_page_fault(unsigned long address, str
 	struct vm_area_struct * vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	D(printk(KERN_DEBUG
 		 "Page fault for %lX on %X at %lX, prot %d write %d\n",
@@ -117,6 +116,8 @@ do_page_fault(unsigned long address, str
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -155,6 +156,7 @@ retry:
 	} else if (writeaccess == 1) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datamm
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
 	unsigned long _pme, lrai, lrad, fixup;
+	unsigned long flags = 0;
 	siginfo_t info;
 	pgd_t *pge;
 	pud_t *pue;
 	pte_t *pte;
-	int write;
 	int fault;
 
 #if 0
@@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datamm
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(__frame))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma(mm, ear0);
@@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datamm
  */
  good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (esr0 & ESR0_ATXC) {
 	default:
 		/* handle write to write protected page */
@@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datamm
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
-		write = 1;
+		flags |= FAULT_FLAG_WRITE;
 		break;
 
 		 /* handle read from protected page */
@@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datamm
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, ear0, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -53,8 +53,7 @@ void do_page_fault(unsigned long address
 	int si_code = SEGV_MAPERR;
 	int fault;
 	const struct exception_table_entry *fixup;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (cause > 0 ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/*
 	 * If we're in an interrupt or have no user context,
@@ -65,6 +64,8 @@ void do_page_fault(unsigned long address
 
 	local_irq_enable();
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -96,6 +97,7 @@ good_area:
 	case FLT_STORE:
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 		break;
 	}
 
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long addres
 	mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
 		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
 
-	flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
-
 	/* mmap_sem is performance critical.... */
 	prefetchw(&mm->mmap_sem);
 
@@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long addres
 	if (notify_page_fault(regs, TRAP_BRKPT))
 		return;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (mask & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
 
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	unsigned long page, addr;
-	int write;
+	unsigned long flags = 0;
 	int fault;
 	siginfo_t info;
 
@@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (error_code & ACE_USERMODE)
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_
  */
 good_area:
 	info.si_code = SEGV_ACCERR;
-	write = 0;
 	switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
 		default:	/* 3: write, present */
 			/* fall through */
 		case ACE_WRITE:	/* write, not present */
 			if (!(vma->vm_flags & VM_WRITE))
 				goto bad_area;
-			write++;
+			flags |= FAULT_FLAG_WRITE;
 			break;
 		case ACE_PROTECTION:	/* read, present */
 		case 0:		/* read, not present */
@@ -194,7 +196,7 @@ good_area:
 	 */
 	addr = (address & PAGE_MASK);
 	set_thread_fault_code(error_code);
-	fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
+	fault = handle_mm_fault(mm, vma, addr, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs,
 	struct vm_area_struct *vma, *prev_vma;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				(write_access ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 
@@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
@@ -121,6 +122,7 @@ good_area:
 	if (write_access) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs,
 	int code = SEGV_MAPERR;
 	int is_write = error_code & ESR_S;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	regs->ear = address;
 	regs->esr = error_code;
@@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -199,6 +201,7 @@ good_area:
 	if (unlikely(is_write)) {
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -41,8 +41,7 @@ asmlinkage void __kprobes do_page_fault(
 	const int field = sizeof(unsigned long) * 2;
 	siginfo_t info;
 	int fault;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-						 (write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 #if 0
 	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@@ -92,6 +91,8 @@ asmlinkage void __kprobes do_page_fault(
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -113,6 +114,7 @@ good_area:
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (cpu_has_rixi) {
 			if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_
 	if (user_mode(regs)) {
 		/* Exception was in userspace: reenable interrupts */
 		local_irq_enable();
+		flags |= FAULT_FLAG_USER;
 	} else {
 		/* If exception was in a syscall, then IRQ's may have
 		 * been enabled or disabled.  If they were enabled,
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs,
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (acc_type & VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma_prev(mm, address, &prev_vma);
@@ -203,8 +207,7 @@ good_area:
 	 * fault.
 	 */
 
-	fault = handle_mm_fault(mm, vma, address,
-			flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
+	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 		return;
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_re
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-	if (is_write)
-		flags |= FAULT_FLAG_WRITE;
-
 #ifdef CONFIG_PPC_ICSWX
 	/*
 	 * we need to do this early because this "data storage
@@ -280,6 +277,9 @@ int __kprobes do_page_fault(struct pt_re
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -408,6 +408,7 @@ good_area:
 	} else if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
 		/* protection fault */
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -302,6 +302,8 @@ static inline int do_exception(struct pt
 	address = trans_exc_code & __FAIL_ADDR_MASK;
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
 		flags |= FAULT_FLAG_WRITE;
 	down_read(&mm->mmap_sem);
--- a/arch/score/mm/fault.c
+++ b/arch/score/mm/fault.c
@@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	const int field = sizeof(unsigned long) * 2;
+	unsigned long flags = 0;
 	siginfo_t info;
 	int fault;
 
@@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
 	if (!vma)
@@ -95,6 +99,7 @@ good_area:
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
 			goto bad_area;
@@ -105,7 +110,7 @@ good_area:
 	* make sure we exit gracefully rather than endlessly redo
 	* the fault.
 	*/
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, flags);
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(
 	struct mm_struct *mm;
 	struct vm_area_struct * vma;
 	int fault;
-	int write = error_code & FAULT_CODE_WRITE;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -476,6 +474,11 @@ good_area:
 
 	set_thread_fault_code(error_code);
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (error_code & FAULT_CODE_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt
 	unsigned long g2;
 	int from_user = !(regs->psr & PSR_PS);
 	int fault, code;
-	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-			      (write ? FAULT_FLAG_WRITE : 0));
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (text_fault)
 		address = regs->pc;
@@ -235,6 +234,11 @@ good_area:
 			goto bad_area;
 	}
 
+	if (from_user)
+		flags |= FAULT_FLAG_USER;
+	if (write)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -383,6 +387,7 @@ static void force_user_fault(unsigned lo
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	unsigned int flags = FAULT_FLAG_USER;
 	int code;
 
 	code = SEGV_MAPERR;
@@ -402,11 +407,12 @@ good_area:
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
+	switch (handle_mm_fault(mm, vma, address, flags)) {
 	case VM_FAULT_SIGBUS:
 	case VM_FAULT_OOM:
 		goto do_sigbus;
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -323,7 +323,8 @@ asmlinkage void __kprobes do_sparc64_fau
 			bad_kernel_pc(regs, address);
 			return;
 		}
-	}
+	} else
+		flags |= FAULT_FLAG_USER;
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -426,13 +427,14 @@ good_area:
 		    vma->vm_file != NULL)
 			set_thread_fault_code(fault_code |
 					      FAULT_CODE_BLKCOMMIT);
+
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		/* Allow reads even for write-only mappings */
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
 
-	flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
 	fault = handle_mm_fault(mm, vma, address, flags);
 
 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_r
 	if (!is_page_fault)
 		write = 1;
 
-	flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-		 (write ? FAULT_FLAG_WRITE : 0));
+	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
 
@@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_r
 		goto bad_area_nosemaphore;
 	}
 
+	if (!is_kernel_mode)
+		flags |= FAULT_FLAG_USER;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -425,6 +427,7 @@ good_area:
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!is_page_fault || !(vma->vm_flags & VM_READ))
 			goto bad_area;
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -30,8 +30,7 @@ int handle_page_fault(unsigned long addr
 	pmd_t *pmd;
 	pte_t *pte;
 	int err = -EFAULT;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 (is_write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	*code_out = SEGV_MAPERR;
 
@@ -42,6 +41,8 @@ int handle_page_fault(unsigned long addr
 	if (in_atomic())
 		goto out_nosemaphore;
 
+	if (is_user)
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
@@ -58,12 +59,15 @@ retry:
 
 good_area:
 	*code_out = SEGV_ACCERR;
-	if (is_write && !(vma->vm_flags & VM_WRITE))
-		goto out;
-
-	/* Don't require VM_READ|VM_EXEC for write faults! */
-	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
-		goto out;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* Don't require VM_READ|VM_EXEC for write faults! */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out;
+	}
 
 	do {
 		int fault;
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, uns
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-				 ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, uns
 	if (in_atomic() || !mm)
 		goto no_context;
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (!(fsr ^ 0x12))
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * As per x86, we may deadlock here.  However, since the kernel only
 	 * validly references user space from well defined areas of the code,
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1017,9 +1017,7 @@ __do_page_fault(struct pt_regs *regs, un
 	unsigned long address;
 	struct mm_struct *mm;
 	int fault;
-	int write = error_code & PF_WRITE;
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-					(write ? FAULT_FLAG_WRITE : 0);
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1089,6 +1087,7 @@ __do_page_fault(struct pt_regs *regs, un
 	if (user_mode_vm(regs)) {
 		local_irq_enable();
 		error_code |= PF_USER;
+		flags |= FAULT_FLAG_USER;
 	} else {
 		if (regs->flags & X86_EFLAGS_IF)
 			local_irq_enable();
@@ -1113,6 +1112,9 @@ __do_page_fault(struct pt_regs *regs, un
 		return;
 	}
 
+	if (error_code & PF_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs)
 	       address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
 #endif
 
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
 retry:
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -167,6 +167,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED	0x40	/* second try */
+#define FAULT_FLAG_USER		0x80	/* The fault originated in userspace */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's



  parent reply	other threads:[~2014-11-19 20:52 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-19 20:51 [PATCH 3.10 00/70] 3.10.61-stable review Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 01/70] ip6_tunnel: Use ip6_tnl_dev_init as the ndo_init function Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 02/70] gre6: Move the setting of dev->iflink into the ndo_init functions Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 03/70] net: sctp: fix NULL pointer dereference in af->from_addr_param on malformed packet Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 04/70] net: sctp: fix memory leak in auth key management Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 05/70] sunvdc: add cdrom and v1.1 protocol support Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 06/70] sunvdc: compute vdisk geometry from capacity Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 07/70] sunvdc: limit each sg segment to a page Greg Kroah-Hartman
2014-11-19 20:51 ` [PATCH 3.10 08/70] vio: fix reuse of vio_dring slot Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 09/70] sunvdc: dont call VD_OP_GET_VTOC Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 10/70] sparc64: Fix crashes in schizo_pcierr_intr_other() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 11/70] sparc64: Do irq_{enter,exit}() around generic_smp_call_function*() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 12/70] sparc32: Implement xchg and atomic_xchg using ATOMIC_HASH locks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 13/70] x86, x32, audit: Fix x32s AUDIT_ARCH wrt audit Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 14/70] audit: keep inode pinned Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 15/70] ahci: Add Device IDs for Intel Sunrise Point PCH Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 16/70] ahci: disable MSI instead of NCQ on Samsung pci-e SSDs on macbooks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 17/70] ALSA: usb-audio: Fix memory leak in FTU quirk Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 18/70] xtensa: re-wire umount syscall to sys_oldumount Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 19/70] libceph: do not crash on large auth tickets Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 20/70] iwlwifi: configure the LTR Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 21/70] macvtap: Fix csum_start when VLAN tags are present Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 22/70] mac80211: fix use-after-free in defragmentation Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 23/70] drm/radeon: add missing crtc unlock when setting up the MC Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 26/70] firewire: cdev: prevent kernel stack leaking into ioctl arguments Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 27/70] nfs: fix pnfs direct write memory leak Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 28/70] scsi: only re-lock door after EH on devices that were reset Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 29/70] parisc: Use compat layer for msgctl, shmat, shmctl and semtimedop syscalls Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 30/70] block: Fix computation of merged request priority Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 31/70] dm btree: fix a recursion depth bug in btree walking code Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 32/70] dm raid: ensure superblocks size matches devices logical block size Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 35/70] NFSv4: Ensure that we remove NFSv4.0 delegations when state has expired Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 36/70] NFS: Dont try to reclaim delegation open state if recovery failed Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 37/70] nfs: Fix use of uninitialized variable in nfs_getattr() Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 38/70] NFSv4: Fix races between nfs_remove_bad_delegation() and delegation return Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 39/70] media: ttusb-dec: buffer overflow in ioctl Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 40/70] kgdb: Remove "weak" from kgdb_arch_pc() declaration Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 41/70] clocksource: Remove "weak" from clocksource_default_clock() declaration Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 42/70] ipc: always handle a new value of auto_msgmni Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 43/70] netfilter: nf_log: account for size of NLMSG_DONE attribute Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 44/70] netfilter: nfnetlink_log: fix maximum packet length logged to userspace Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 45/70] netfilter: nf_log: release skbuff on nlmsg put failure Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 46/70] netfilter: xt_bpf: add mising opaque struct sk_filter definition Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 47/70] netfilter: nf_nat: fix oops on netns removal Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 48/70] br: fix use of ->rx_handler_data in code executed on non-rx_handler path Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 49/70] ARM: probes: fix instruction fetch order with <asm/opcodes.h> Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 51/70] MIPS: Fix forgotten preempt_enable() when CPU has inclusive pcaches Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 52/70] perf: Handle compat ioctl Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 53/70] mei: bus: fix possible boundaries violation Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 54/70] perf/x86/intel: Use proper dTLB-load-misses event on IvyBridge Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 55/70] ARM: Correct BUG() assembly to ensure it is endian-agnostic Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 56/70] net/mlx4_en: Fix BlueFlame race Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 57/70] SCSI: hpsa: fix a race in cmd_free/scsi_done Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 58/70] KVM: x86: Dont report guest userspace emulation error to userspace Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 59/70] net: sctp: fix remote memory pressure from excessive queueing Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 60/70] net: sctp: fix panic on duplicate ASCONF chunks Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 61/70] net: sctp: fix skb_over_panic when receiving malformed " Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 62/70] mm: invoke oom-killer from remaining unconverted page fault handlers Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 63/70] arch: mm: remove obsolete init OOM protection Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 64/70] arch: mm: do not invoke OOM killer on kernel fault OOM Greg Kroah-Hartman
2014-11-19 20:52 ` Greg Kroah-Hartman [this message]
2014-11-19 20:52 ` [PATCH 3.10 66/70] x86: finish user fault error path with fatal signal Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 67/70] mm: memcg: enable memcg OOM killer only for user faults Greg Kroah-Hartman
2014-11-19 20:52 ` [PATCH 3.10 68/70] mm: memcg: rework and document OOM waiting and wakeup Greg Kroah-Hartman
2014-11-19 20:53 ` [PATCH 3.10 69/70] mm: memcg: do not trap chargers with full callstack on OOM Greg Kroah-Hartman
2014-11-19 20:53 ` [PATCH 3.10 70/70] mm: memcg: handle non-error OOM situations more gracefully Greg Kroah-Hartman
2014-11-20  5:30 ` [PATCH 3.10 00/70] 3.10.61-stable review Guenter Roeck
2014-11-21  1:38 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141119205213.062595580@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=azurit@pobox.sk \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhocko@suse.cz \
    --cc=rientjes@google.com \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).