public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Meelis Roos <mroos@ut.ee>,
	"David S. Miller" <davem@davemloft.net>
Subject: [ 57/58] sparc64: Fix tsb_grow() in atomic context.
Date: Mon, 25 Feb 2013 14:19:53 -0800	[thread overview]
Message-ID: <20130225221648.454163646@linuxfoundation.org> (raw)
In-Reply-To: <20130225221636.018756060@linuxfoundation.org>

3.7-stable review patch.  If anyone has any objections, please let me know.

------------------


From: "David S. Miller" <davem@davemloft.net>

[ Upstream commit 0fbebed682ff2788dee58e8d7f7dda46e33aa10b ]

If our first THP installation for an MM is via the set_pmd_at() done
during khugepaged's collapsing we'll end up in tsb_grow() trying to do
a GFP_KERNEL allocation with several locks held.

Simply using GFP_ATOMIC in this situation is not the best option
because we really can't have this fail, so we'd really like to keep
this an order 0 GFP_KERNEL allocation if possible.

Also, doing the TSB allocation from khugepaged is a really bad idea
because we'll allocate it potentially from the wrong NUMA node in that
context.

So what we do is defer the hugepage TSB allocation until the first TLB
miss we take on a hugepage.  This is slightly tricky because we have
to handle two unusual cases:

1) Taking the first hugepage TLB miss in the window trap handler.
   We'll call the winfix_trampoline when that is detected.

2) An initial TSB allocation via TLB miss races with a hugetlb
   fault on another cpu running the same MM.  We handle this by
   unconditionally loading the TSB we see into the current cpu
   even if it's non-NULL at hugetlb_setup time.

Reported-by: Meelis Roos <mroos@ut.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/sparc/include/asm/hugetlb.h |    1 -
 arch/sparc/include/asm/page_64.h |    4 ++--
 arch/sparc/kernel/tsb.S          |   39 +++++++++++++++++++++++++++++++++++----
 arch/sparc/mm/fault_64.c         |    9 +++++++--
 arch/sparc/mm/init_64.c          |   24 +++++++++++++++++++-----
 arch/sparc/mm/tlb.c              |   11 +++++++++--
 6 files changed, 72 insertions(+), 16 deletions(-)

--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -12,7 +12,6 @@ pte_t huge_ptep_get_and_clear(struct mm_
 
 static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
 {
-	hugetlb_setup(mm);
 }
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -27,8 +27,8 @@
 #ifndef __ASSEMBLY__
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-struct mm_struct;
-extern void hugetlb_setup(struct mm_struct *mm);
+struct pt_regs;
+extern void hugetlb_setup(struct pt_regs *regs);
 #endif
 
 #define WANT_PAGE_VIRTUAL
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -136,12 +136,43 @@ tsb_miss_page_table_walk_sun4v_fastpath:
 	 nop
 
 	/* It is a huge page, use huge page TSB entry address we
-	 * calculated above.
+	 * calculated above.  If the huge page TSB has not been
+	 * allocated, setup a trap stack and call hugetlb_setup()
+	 * to do so, then return from the trap to replay the TLB
+	 * miss.
+	 *
+	 * This is necessary to handle the case of transparent huge
+	 * pages where we don't really have a non-atomic context
+	 * in which to allocate the hugepage TSB hash table.  When
+	 * the 'mm' faults in the hugepage for the first time, we
+	 * thus handle it here.  This also makes sure that we can
+	 * allocate the TSB hash table on the correct NUMA node.
 	 */
 	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
-	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2
-	cmp		%g2, -1
-	movne		%xcc, %g2, %g1
+	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
+	cmp		%g1, -1
+	bne,pt		%xcc, 60f
+	 nop
+
+661:	rdpr		%pstate, %g5
+	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	SET_GL(1)
+	nop
+	.previous
+
+	rdpr	%tl, %g3
+	cmp	%g3, 1
+	bne,pn	%xcc, winfix_trampoline
+	 nop
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	hugetlb_setup
+	 add	%sp, PTREGS_OFF, %o0
+	ba,pt	%xcc, rtrap
+	 nop
+
 60:
 #endif
 
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -472,8 +472,13 @@ good_area:
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mm_rss = mm->context.huge_pte_count;
 	if (unlikely(mm_rss >
-		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
-		tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
+		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
+			tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+		else
+			hugetlb_setup(regs);
+
+	}
 #endif
 	return;
 
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2718,14 +2718,28 @@ static void context_reload(void *__data)
 		load_secondary_context(mm);
 }
 
-void hugetlb_setup(struct mm_struct *mm)
+void hugetlb_setup(struct pt_regs *regs)
 {
-	struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+	struct mm_struct *mm = current->mm;
+	struct tsb_config *tp;
 
-	if (likely(tp->tsb != NULL))
-		return;
+	if (in_atomic() || !mm) {
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+		pr_alert("Unexpected HugeTLB setup in atomic context.\n");
+		die_if_kernel("HugeTSB in atomic", regs);
+	}
+
+	tp = &mm->context.tsb_block[MM_TSB_HUGE];
+	if (likely(tp->tsb == NULL))
+		tsb_grow(mm, MM_TSB_HUGE, 0);
 
-	tsb_grow(mm, MM_TSB_HUGE, 0);
 	tsb_context_switch(mm);
 	smp_tsb_sync(mm);
 
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -135,8 +135,15 @@ void set_pmd_at(struct mm_struct *mm, un
 			mm->context.huge_pte_count++;
 		else
 			mm->context.huge_pte_count--;
-		if (mm->context.huge_pte_count == 1)
-			hugetlb_setup(mm);
+
+		/* Do not try to allocate the TSB hash table if we
+		 * don't have one already.  We have various locks held
+		 * and thus we'll end up doing a GFP_KERNEL allocation
+		 * in an atomic context.
+		 *
+		 * Instead, we let the first TLB miss on a hugepage
+		 * take care of this.
+		 */
 	}
 
 	if (!pmd_none(orig)) {



  parent reply	other threads:[~2013-02-25 22:21 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-25 22:18 [ 00/58] 3.7.10-stable review Greg Kroah-Hartman
2013-02-25 22:18 ` [ 01/58] drm/nouveau/vm: fix memory corruption when pgt allocation fails Greg Kroah-Hartman
2013-02-25 22:18 ` [ 02/58] x86-32, mm: Rip out x86_32 NUMA remapping code Greg Kroah-Hartman
2013-02-25 22:18 ` [ 03/58] x86-32, mm: Remove reference to resume_map_numa_kva() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 04/58] x86-32, mm: Remove reference to alloc_remap() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 05/58] perf tools: Fix build with bison 2.3 and older Greg Kroah-Hartman
2013-02-25 22:19 ` [ 06/58] perf hists: Fix period symbol_conf.field_sep display Greg Kroah-Hartman
2013-02-25 22:19 ` [ 07/58] mm: fix pageblock bitmap allocation Greg Kroah-Hartman
2013-02-25 22:19 ` [ 08/58] timeconst.pl: Eliminate Perl warning Greg Kroah-Hartman
2013-02-25 22:19 ` [ 09/58] genirq: Avoid deadlock in spurious handling Greg Kroah-Hartman
2013-02-25 22:19 ` [ 10/58] posix-cpu-timers: Fix nanosleep task_struct leak Greg Kroah-Hartman
2013-02-25 22:19 ` [ 11/58] hrtimer: Prevent hrtimer_enqueue_reprogram race Greg Kroah-Hartman
2013-02-25 22:19 ` [ 12/58] x86: Hyper-V: register clocksource only if its advertised Greg Kroah-Hartman
2013-02-25 22:19 ` [ 13/58] workqueue: un-GPL function delayed_work_timer_fn() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 14/58] ALSA: ali5451: remove irq enabling in pointer callback Greg Kroah-Hartman
2013-02-25 22:19 ` [ 15/58] ALSA: rme32.c irq enabling after spin_lock_irq Greg Kroah-Hartman
2013-02-25 22:19 ` [ 16/58] tty: Prevent deadlock in n_gsm driver Greg Kroah-Hartman
2013-02-25 22:19 ` [ 17/58] tty: set_termios/set_termiox should not return -EINTR Greg Kroah-Hartman
2013-02-25 22:19 ` [ 18/58] USB: serial: fix null-pointer dereferences on disconnect Greg Kroah-Hartman
2013-02-25 22:19 ` [ 19/58] serial: imx: Fix recursive locking bug Greg Kroah-Hartman
2013-02-25 22:19 ` [ 20/58] serial_core: Fix type definition for PORT_BRCM_TRUMANAGE Greg Kroah-Hartman
2013-02-25 22:19 ` [ 21/58] b43: Increase number of RX DMA slots Greg Kroah-Hartman
2013-02-25 22:19 ` [ 22/58] rtlwifi: rtl8192cu: Add new USB ID Greg Kroah-Hartman
2013-02-25 22:19 ` [ 23/58] rtlwifi: usb: allocate URB control message setup_packet and data buffer separately Greg Kroah-Hartman
2013-02-25 22:19 ` [ 24/58] tty vt: fix character insertion overflow Greg Kroah-Hartman
2013-02-25 22:19 ` [ 25/58] xen: Send spinlock IPI to all waiters Greg Kroah-Hartman
2013-02-25 22:19 ` [ 26/58] xen: close evtchn port if binding to irq fails Greg Kroah-Hartman
2013-02-25 22:19 ` [ 27/58] zram: Fix deadlock bug in partial read/write Greg Kroah-Hartman
2013-02-25 22:19 ` [ 28/58] Driver core: treat unregistered bus_types as having no devices Greg Kroah-Hartman
2013-02-25 22:19 ` [ 29/58] mmu_notifier_unregister NULL Pointer deref and multiple ->release() callouts Greg Kroah-Hartman
2013-02-25 22:19 ` [ 30/58] KVM: s390: Handle hosts not supporting s390-virtio Greg Kroah-Hartman
2013-02-25 22:19 ` [ 31/58] s390/kvm: Fix store status for ACRS/FPRS Greg Kroah-Hartman
2013-02-25 22:19 ` [ 32/58] futex: Revert "futex: Mark get_robust_list as deprecated" Greg Kroah-Hartman
2013-02-25 22:19 ` [ 33/58] inotify: remove broken mask checks causing unmount to be EINVAL Greg Kroah-Hartman
2013-02-25 22:19 ` [ 34/58] fs/block_dev.c: page cache wrongly left invalidated after revalidate_disk() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 35/58] ocfs2: unlock super lock if lockres refresh failed Greg Kroah-Hartman
2013-02-25 22:19 ` [ 36/58] drivers/video/backlight/adp88?0_bl.c: fix resume Greg Kroah-Hartman
2013-02-25 22:19 ` [ 37/58] tmpfs: fix use-after-free of mempolicy object Greg Kroah-Hartman
2013-02-25 22:19 ` [ 38/58] mm/fadvise.c: drain all pagevecs if POSIX_FADV_DONTNEED fails to discard all pages Greg Kroah-Hartman
2013-02-25 22:19 ` [ 39/58] xen-pciback: rate limit error messages from xen_pcibk_enable_msi{,x}() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 40/58] drivercore: Fix ordering between deferred_probe and exiting initcalls Greg Kroah-Hartman
2013-02-25 22:19 ` [ 41/58] umount oops when remove blocklayoutdriver first Greg Kroah-Hartman
2013-02-25 22:19 ` [ 42/58] NLM: Ensure that we resend all pending blocking locks after a reclaim Greg Kroah-Hartman
2013-02-25 22:19 ` [ 43/58] NFSv4.1: Dont decode skipped layoutgets Greg Kroah-Hartman
2013-02-25 22:19 ` [ 44/58] p54usb: corrected USB ID for T-Com Sinus 154 data II Greg Kroah-Hartman
2013-02-25 22:19 ` [ 45/58] ALSA: usb-audio: fix Roland A-PRO support Greg Kroah-Hartman
2013-02-25 22:19 ` [ 46/58] ALSA: usb: Fix Processing Unit Descriptor parsers Greg Kroah-Hartman
2013-02-25 22:19 ` [ 47/58] ALSA: hda - Release assigned pin/cvt at error path of hdmi_pcm_open() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 48/58] ALSA: hda - Fix default multichannel HDMI mapping regression Greg Kroah-Hartman
2013-02-25 22:19 ` [ 49/58] ALSA: hda - Workaround for silent output on Sony Vaio VGC-LN51JGB with ALC889 Greg Kroah-Hartman
2013-02-25 22:19 ` [ 50/58] ALSA: hda - hdmi: ELD shouldnt be valid after unplug Greg Kroah-Hartman
2013-02-25 22:19 ` [ 51/58] GFS2: Get a block reservation before resizing a file Greg Kroah-Hartman
2013-02-25 22:34   ` Bob Peterson
2013-02-25 22:19 ` [ 52/58] sunvdc: Fix off-by-one in generic_request() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 53/58] sparc64: Add missing HAVE_ARCH_TRANSPARENT_HUGEPAGE Greg Kroah-Hartman
2013-02-25 22:19 ` [ 54/58] sparc64: Fix get_user_pages_fast() wrt. THP Greg Kroah-Hartman
2013-02-25 22:19 ` [ 55/58] sparc64: Fix gfp_flags setting in tsb_grow() Greg Kroah-Hartman
2013-02-25 22:19 ` [ 56/58] sparc64: Handle hugepage TSB being NULL Greg Kroah-Hartman
2013-02-25 22:19 ` Greg Kroah-Hartman [this message]
2013-02-25 22:19 ` [ 58/58] sparc64: Fix huge PMD to PTE translation for sun4u in TLB miss handler Greg Kroah-Hartman
2013-02-26  0:47 ` [ 00/58] 3.7.10-stable review Shuah Khan
2013-02-26  1:02   ` Greg Kroah-Hartman
2013-02-26 17:37 ` Andre Tomt
2013-02-26 17:48   ` Josh Boyer
2013-02-26 17:52   ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130225221648.454163646@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mroos@ut.ee \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox