linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Nick Piggin <npiggin@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Avi Kivity <avi@redhat.com>, Thomas Gleixner <tglx@linutronix.de>,
	Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@elte.hu>,
	akpm@linux-foundation.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	David Miller <davem@davemloft.net>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Mel Gorman <mel@csn.ul.ie>
Subject: Re: [PATCH 06/13] mm: Preemptible mmu_gather
Date: Fri, 09 Apr 2010 22:36:24 +0200	[thread overview]
Message-ID: <1270845384.20295.3369.camel@laptop> (raw)
In-Reply-To: <20100409032509.GH5683@laptop>

On Fri, 2010-04-09 at 13:25 +1000, Nick Piggin wrote:
> Have you done some profiling on this? What I would like to see, if
> it's not too much complexity, is to have a small set of pages to
> handle common size frees, and then use them up first by default
> before attempting to allocate more.
> 
> Also, it would be cool to be able to chain allocations to avoid
> TLB flushes even on big frees (overridable by arch of course, in
> case they're doing some non-preeemptible work or you wish to break
> up lock hold times). But that might be just getting over engineered.
> 
Measuring ITLB_FLUSH on Intel nehalem using:

perf stat -a -e r01ae make O=defconfig-build/ -j48 bzImage

-linus      5825850 +- 2545  (100%)
 +patches   5891341 +- 6045  (101%)
 +below     5783991 +- 4725  ( 99%)

(No slab allocations yet)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/asm-generic/tlb.h |  122 ++++++++++++++++++++++++++++++----------------
 1 file changed, 82 insertions(+), 40 deletions(-)

Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -17,16 +17,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
-  #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
-#else
-  #define tlb_fast_mode(tlb) 1
-#endif
-
 #ifdef HAVE_ARCH_RCU_TABLE_FREE
 /*
  * Semi RCU freeing of the page directories.
@@ -70,31 +60,66 @@ extern void tlb_remove_table(struct mmu_
 
 #endif
 
+struct mmu_gather_batch {
+	struct mmu_gather_batch	*next;
+	unsigned int		nr;
+	unsigned int		max;
+	struct page		*pages[0];
+};
+
+#define MAX_GATHER_BATCH	\
+	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(unsigned long))
+
 /* struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
  */
 struct mmu_gather {
 	struct mm_struct	*mm;
-	unsigned int		nr;	/* set to ~0U means fast mode */
-	unsigned int		max;	/* nr < max */
-	unsigned int		need_flush;/* Really unmapped some ptes? */
-	unsigned int		fullmm; /* non-zero means full mm flush */
-	struct page		**pages;
-	struct page		*local[8];
+	unsigned int		need_flush : 1,	/* Did free PTEs */
+				fast_mode  : 1; /* No batching   */
+	unsigned int		fullmm;		/* Flush full mm */
+
+	struct mmu_gather_batch *active;
+	struct mmu_gather_batch	local;
+	struct page		*__pages[8];
 
 #ifdef HAVE_ARCH_RCU_TABLE_FREE
 	struct mmu_table_batch	*batch;
 #endif
 };
 
-static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+/*
+ * For UP we don't need to worry about TLB flush
+ * and page free order so much..
+ */
+#ifdef CONFIG_SMP
+  #define tlb_fast_mode(tlb) (tlb->fast_mode)
+#else
+  #define tlb_fast_mode(tlb) 1
+#endif
+
+static inline int tlb_next_batch(struct mmu_gather *tlb)
 {
-	unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+	struct mmu_gather_batch *batch;
 
-	if (addr) {
-		tlb->pages = (void *)addr;
-		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	batch = tlb->active;
+	if (batch->next) {
+		tlb->active = batch->next;
+		return 1;
 	}
+
+	batch = (void *)__get_free_pages(GFP_ATOMIC, 0);
+	if (!batch)
+		return 0;
+
+	batch->next = NULL;
+	batch->nr   = 0;
+	batch->max  = MAX_GATHER_BATCH;
+
+	tlb->active->next = batch;
+	tlb->active = batch;
+
+	return 1;
 }
 
 /* tlb_gather_mmu
@@ -105,17 +130,16 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
 {
 	tlb->mm = mm;
 
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->pages = tlb->local;
-
-	if (num_online_cpus() > 1) {
-		tlb->nr = 0;
-		__tlb_alloc_pages(tlb);
-	} else /* Use fast mode if only one CPU is online */
-		tlb->nr = ~0U;
-
+	tlb->need_flush = 0;
+	if (num_online_cpus() == 1)
+		tlb->fast_mode = 1;
 	tlb->fullmm = full_mm_flush;
 
+	tlb->local.next = NULL;
+	tlb->local.nr   = 0;
+	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
+	tlb->active     = &tlb->local;
+
 #ifdef HAVE_ARCH_RCU_TABLE_FREE
 	tlb->batch = NULL;
 #endif
@@ -124,6 +148,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
 static inline void
 tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
+	struct mmu_gather_batch *batch;
+
 	if (!tlb->need_flush)
 		return;
 	tlb->need_flush = 0;
@@ -131,12 +157,14 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 #ifdef HAVE_ARCH_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
-	if (!tlb_fast_mode(tlb)) {
-		free_pages_and_swap_cache(tlb->pages, tlb->nr);
-		tlb->nr = 0;
-		if (tlb->pages == tlb->local)
-			__tlb_alloc_pages(tlb);
+	if (tlb_fast_mode(tlb))
+		return;
+
+	for (batch = &tlb->local; batch; batch = batch->next) {
+		free_pages_and_swap_cache(batch->pages, batch->nr);
+		batch->nr = 0;
 	}
+	tlb->active = &tlb->local;
 }
 
 /* tlb_finish_mmu
@@ -146,13 +174,18 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
+	struct mmu_gather_batch *batch, *next;
+
 	tlb_flush_mmu(tlb, start, end);
 
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	if (tlb->pages != tlb->local)
-		free_pages((unsigned long)tlb->pages, 0);
+	for (batch = tlb->local.next; batch; batch = next) {
+		next = batch->next;
+		free_pages((unsigned long)batch, 0);
+	}
+	tlb->local.next = NULL;
 }
 
 /* tlb_remove_page
@@ -162,14 +195,23 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
  */
 static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
+	struct mmu_gather_batch *batch;
+
 	tlb->need_flush = 1;
+
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
 		return;
 	}
-	tlb->pages[tlb->nr++] = page;
-	if (tlb->nr >= tlb->max)
-		tlb_flush_mmu(tlb, 0, 0);
+
+	batch = tlb->active;
+	if (batch->nr == batch->max) {
+		if (!tlb_next_batch(tlb))
+			tlb_flush_mmu(tlb, 0, 0);
+		batch = tlb->active;
+	}
+
+	batch->pages[batch->nr++] = page;
 }
 
 /**

  parent reply	other threads:[~2010-04-09 20:36 UTC|newest]

Thread overview: 120+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-08 19:17 [PATCH 00/13] mm: preemptibility -v2 Peter Zijlstra
2010-04-08 19:17 ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 01/13] powerpc: Add rcu_read_lock() to gup_fast() implementation Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 20:31   ` Rik van Riel
2010-04-09  3:11   ` Nick Piggin
2010-04-13  1:05   ` Benjamin Herrenschmidt
2010-04-13  3:43     ` Paul E. McKenney
2010-04-14 13:51       ` Peter Zijlstra
2010-04-15 14:28         ` Paul E. McKenney
2010-04-16  6:54           ` Benjamin Herrenschmidt
2010-04-16 13:43             ` Paul E. McKenney
2010-04-16 23:25               ` Benjamin Herrenschmidt
2010-04-16 23:25                 ` Benjamin Herrenschmidt
2010-04-16 13:51           ` Peter Zijlstra
2010-04-16 14:17             ` Paul E. McKenney
2010-04-16 14:17               ` Paul E. McKenney
2010-04-16 14:23               ` Peter Zijlstra
2010-04-16 14:32                 ` Paul E. McKenney
2010-04-16 14:56                   ` Peter Zijlstra
2010-04-16 15:09                     ` Paul E. McKenney
2010-04-16 15:14                       ` Peter Zijlstra
2010-04-16 16:45                         ` Paul E. McKenney
2010-04-16 19:37                           ` Peter Zijlstra
2010-04-16 20:28                             ` Paul E. McKenney
2010-04-18  3:06                           ` James Bottomley
2010-04-18 13:55                             ` Paul E. McKenney
2010-04-18 18:55                               ` James Bottomley
2010-04-16  6:51       ` Benjamin Herrenschmidt
2010-04-16  8:18         ` Nick Piggin
2010-04-16  8:29           ` Benjamin Herrenschmidt
2010-04-16  9:22             ` Nick Piggin
2010-04-08 19:17 ` [PATCH 02/13] mm: Revalidate anon_vma in page_lock_anon_vma() Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 20:50   ` Rik van Riel
2010-04-08 21:20   ` Andrew Morton
2010-04-08 21:54     ` Peter Zijlstra
2010-04-08 21:54       ` Peter Zijlstra
2010-04-09  2:19       ` KOSAKI Motohiro
2010-04-09  2:19   ` Minchan Kim
2010-04-09  3:16   ` Nick Piggin
2010-04-09  4:56     ` KAMEZAWA Hiroyuki
2010-04-09  6:34       ` KOSAKI Motohiro
2010-04-09  6:47         ` KAMEZAWA Hiroyuki
2010-04-09  7:29           ` KOSAKI Motohiro
2010-04-09  7:57             ` KAMEZAWA Hiroyuki
2010-04-09  8:03               ` KAMEZAWA Hiroyuki
2010-04-09  8:24                 ` KAMEZAWA Hiroyuki
2010-04-09  8:01             ` Minchan Kim
2010-04-09  8:17               ` KOSAKI Motohiro
2010-04-09 14:41                 ` mlock and pageout race? Minchan Kim
2010-04-09  8:44             ` [PATCH 02/13] mm: Revalidate anon_vma in page_lock_anon_vma() Peter Zijlstra
2010-05-24 19:32               ` Andrew Morton
2010-05-25  9:01                 ` Peter Zijlstra
2010-05-25  9:01                   ` Peter Zijlstra
2010-04-09 12:57   ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 03/13] x86: Remove last traces of quicklist usage Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 20:51   ` Rik van Riel
2010-04-08 19:17 ` [PATCH 04/13] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09 12:35   ` Rik van Riel
2010-04-08 19:17 ` [PATCH 05/13] mm: Make use of the anon_vma ref count Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09  7:04   ` Christian Ehrhardt
2010-04-09  7:04     ` Christian Ehrhardt
2010-04-09  9:57     ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 06/13] mm: Preemptible mmu_gather Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09  3:25   ` Nick Piggin
2010-04-09  8:18     ` Peter Zijlstra
2010-04-09 20:36     ` Peter Zijlstra [this message]
2010-04-19 19:16       ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 07/13] powerpc: " Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09  4:07   ` Nick Piggin
2010-04-09  8:14     ` Peter Zijlstra
2010-04-09  8:14       ` Peter Zijlstra
2010-04-09  8:46       ` Nick Piggin
2010-04-09  9:22         ` Peter Zijlstra
2010-04-13  2:06       ` Benjamin Herrenschmidt
2010-04-13  1:56     ` Benjamin Herrenschmidt
2010-04-13  1:23   ` Benjamin Herrenschmidt
2010-04-13 10:22     ` Peter Zijlstra
2010-04-14 13:34     ` Peter Zijlstra
2010-04-14 13:34       ` Peter Zijlstra
2010-04-14 13:51     ` Peter Zijlstra
2010-04-14 13:51       ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 08/13] sparc: " Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 09/13] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09  3:35   ` Nick Piggin
2010-04-09  8:08     ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 10/13] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09 15:36   ` Rik van Riel
2010-04-08 19:17 ` [PATCH 11/13] mutex: Provide mutex_is_contended Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-09 15:37   ` Rik van Riel
2010-04-08 19:17 ` [PATCH 12/13] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 19:17 ` [PATCH 13/13] mm: Optimize page_lock_anon_vma Peter Zijlstra
2010-04-08 19:17   ` Peter Zijlstra
2010-04-08 22:18   ` Paul E. McKenney
2010-04-09  8:35     ` Peter Zijlstra
2010-04-09 19:22       ` Paul E. McKenney
2010-04-09 19:22         ` Paul E. McKenney
2010-04-08 20:29 ` [PATCH 00/13] mm: preemptibility -v2 David Miller
2010-04-08 20:35   ` Peter Zijlstra
2010-04-09  1:00   ` David Miller
2010-04-09  4:14 ` Nick Piggin
2010-04-09  8:35   ` Peter Zijlstra
2010-04-09  8:35     ` Peter Zijlstra
2010-04-09  8:50     ` Nick Piggin
2010-04-09  8:58       ` Peter Zijlstra
2010-04-09  8:58 ` Martin Schwidefsky
2010-04-09  9:53   ` Peter Zijlstra
2010-04-09  9:03 ` David Howells
2010-04-09  9:22   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1270845384.20295.3369.camel@laptop \
    --to=peterz@infradead.org \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=davem@davemloft.net \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=npiggin@suse.de \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).