From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Andrea Arcangeli <aarcange@redhat.com>,
Avi Kivity <avi@redhat.com>, Thomas Gleixner <tglx@linutronix.de>,
Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@elte.hu>,
akpm@linux-fou
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
David Miller <davem@davemloft.net>,
Hugh Dickins <hugh.dickins@tiscali.co.uk>,
Mel Gorman <mel@csn.ul.ie>, Nick Piggin <npiggin@suse.de>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Paul McKenney <paulmck@linux.vnet.ibm.com>,
Yanmin Zhang <yanmin_zhang@linux.intel.com>,
Stephen Rothwell <sfr@canb.auug.org.au>
Subject: [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code
Date: Sat, 28 Aug 2010 16:16:52 +0200 [thread overview]
Message-ID: <20100828142456.450686838@chello.nl> (raw)
In-Reply-To: 20100828141637.421594670@chello.nl
[-- Attachment #1: mm-preempt-tlb-gather-rcu.patch --]
[-- Type: text/plain, Size: 11801 bytes --]
In case other architectures require RCU freed page-tables to implement
gup_fast() and software filled hashes and similar things, provide the
means to do so by moving the logic into generic code.
Requested-by: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/include/asm/pgalloc.h | 23 +++++++-
arch/powerpc/include/asm/tlb.h | 10 ---
arch/powerpc/mm/pgtable.c | 98 -------------------------------------
arch/powerpc/mm/tlb_hash32.c | 3 -
arch/powerpc/mm/tlb_hash64.c | 3 -
arch/powerpc/mm/tlb_nohash.c | 3 -
include/asm-generic/tlb.h | 57 +++++++++++++++++++--
mm/memory.c | 77 +++++++++++++++++++++++++++++
8 files changed, 149 insertions(+), 125 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,31 @@ static inline void pte_free(struct mm_st
#endif
#ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(struct mmu_gather *tlb);
+#define HAVE_ARCH_RCU_TABLE_FREE
+
+struct mmu_gather;
+extern void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,16 +28,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
-#define HAVE_ARCH_MMU_GATHER 1
-
-struct pte_freelist_batch;
-
-struct arch_mmu_gather {
- struct pte_freelist_batch *batch;
-};
-
-#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
-
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,104 +33,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_SMP
-
-/*
- * Handle batching of page table freeing on SMP. Page tables are
- * queued up and send to be freed later by RCU in order to avoid
- * freeing a page table page that is being walked without locks
- */
-
-static unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
- struct rcu_head rcu;
- unsigned int index;
- unsigned long tables[0];
-};
-
-#define PTE_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
- / sizeof(unsigned long))
-
-static void pte_free_smp_sync(void *arg)
-{
- /* Do nothing, just ensure we sync with all CPUs */
-}
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(void *table, unsigned shift)
-{
- pte_freelist_forced_free++;
-
- smp_call_function(pte_free_smp_sync, NULL, 1);
-
- pgtable_free(table, shift);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
- struct pte_freelist_batch *batch =
- container_of(head, struct pte_freelist_batch, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++) {
- void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
- }
-
- free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
- call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
- unsigned long pgf;
-
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- pgtable_free(table, shift);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
- if (*batchp == NULL) {
- pgtable_free_now(table, shift);
- return;
- }
- (*batchp)->index = 0;
- }
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf = (unsigned long)table | shift;
- (*batchp)->tables[(*batchp)->index++] = pgf;
- if ((*batchp)->index == PTE_FREELIST_SIZE) {
- pte_free_submit(*batchp);
- *batchp = NULL;
- }
-}
-
-void pte_free_finish(struct mmu_gather *tlb)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
-
- if (*batchp == NULL)
- return;
- pte_free_submit(*batchp);
- *batchp = NULL;
-}
-
-#endif /* CONFIG_SMP */
-
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
*/
_tlbia();
}
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -165,9 +165,6 @@ void tlb_flush(struct mmu_gather *tlb)
__flush_tlb_pending(tlbbatch);
put_cpu_var(ppc64_tlb_batch);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
void tlb_flush(struct mmu_gather *tlb)
{
flush_tlb_mm(tlb->mm);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -27,6 +27,49 @@
#define tlb_fast_mode(tlb) 1
#endif
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+/*
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the copmletion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
+ */
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#endif
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
@@ -36,11 +79,12 @@ struct mmu_gather {
unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
-#ifdef HAVE_ARCH_MMU_GATHER
- struct arch_mmu_gather arch;
-#endif
struct page **pages;
struct page *local[8];
+
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ struct mmu_table_batch *batch;
+#endif
};
static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
@@ -72,8 +116,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
tlb->fullmm = full_mm_flush;
-#ifdef HAVE_ARCH_MMU_GATHER
- tlb->arch = ARCH_MMU_GATHER_INIT;
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ tlb->batch = NULL;
#endif
}
@@ -84,6 +128,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
return;
tlb->need_flush = 0;
tlb_flush(tlb);
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -193,6 +193,83 @@ static void check_sync_rss_stat(struct t
#endif
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely on
+ * IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->need_flush = 1;
+
+ /*
+ * When there's less then two users of this mm there cannot be a
+ * concurrent page-table walk.
+ */
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
+ __tlb_remove_table(table);
+ return;
+ }
+
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)__get_free_page(GFP_ATOMIC);
+ if (*batch == NULL) {
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_table_flush(tlb);
+}
+
+#endif
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Andrea Arcangeli <aarcange@redhat.com>,
Avi Kivity <avi@redhat.com>, Thomas Gleixner <tglx@linutronix.de>,
Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@elte.hu>,
akpm@linux-foundation.org,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
David Miller <davem@davemloft.net>,
Hugh Dickins <hugh.dickins@tiscali.co.uk>,
Mel Gorman <mel@csn.ul.ie>, Nick Piggin <npiggin@suse.de>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Paul McKenney <paulmck@linux.vnet.ibm.com>,
Yanmin Zhang <yanmin_zhang@linux.intel.com>,
Stephen Rothwell <sfr@canb.auug.org.au>
Subject: [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code
Date: Sat, 28 Aug 2010 16:16:52 +0200 [thread overview]
Message-ID: <20100828142456.450686838@chello.nl> (raw)
Message-ID: <20100828141652.Grd2XyUtFouBs9zVjfsHCy9LFtOs_dXZHG6ncEa2pgg@z> (raw)
In-Reply-To: 20100828141637.421594670@chello.nl
[-- Attachment #1: mm-preempt-tlb-gather-rcu.patch --]
[-- Type: text/plain, Size: 11803 bytes --]
In case other architectures require RCU freed page-tables to implement
gup_fast() and software filled hashes and similar things, provide the
means to do so by moving the logic into generic code.
Requested-by: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/include/asm/pgalloc.h | 23 +++++++-
arch/powerpc/include/asm/tlb.h | 10 ---
arch/powerpc/mm/pgtable.c | 98 -------------------------------------
arch/powerpc/mm/tlb_hash32.c | 3 -
arch/powerpc/mm/tlb_hash64.c | 3 -
arch/powerpc/mm/tlb_nohash.c | 3 -
include/asm-generic/tlb.h | 57 +++++++++++++++++++--
mm/memory.c | 77 +++++++++++++++++++++++++++++
8 files changed, 149 insertions(+), 125 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,31 @@ static inline void pte_free(struct mm_st
#endif
#ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(struct mmu_gather *tlb);
+#define HAVE_ARCH_RCU_TABLE_FREE
+
+struct mmu_gather;
+extern void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,16 +28,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
-#define HAVE_ARCH_MMU_GATHER 1
-
-struct pte_freelist_batch;
-
-struct arch_mmu_gather {
- struct pte_freelist_batch *batch;
-};
-
-#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
-
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,104 +33,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_SMP
-
-/*
- * Handle batching of page table freeing on SMP. Page tables are
- * queued up and send to be freed later by RCU in order to avoid
- * freeing a page table page that is being walked without locks
- */
-
-static unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
- struct rcu_head rcu;
- unsigned int index;
- unsigned long tables[0];
-};
-
-#define PTE_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
- / sizeof(unsigned long))
-
-static void pte_free_smp_sync(void *arg)
-{
- /* Do nothing, just ensure we sync with all CPUs */
-}
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(void *table, unsigned shift)
-{
- pte_freelist_forced_free++;
-
- smp_call_function(pte_free_smp_sync, NULL, 1);
-
- pgtable_free(table, shift);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
- struct pte_freelist_batch *batch =
- container_of(head, struct pte_freelist_batch, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++) {
- void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
- }
-
- free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
- call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
- unsigned long pgf;
-
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- pgtable_free(table, shift);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
- if (*batchp == NULL) {
- pgtable_free_now(table, shift);
- return;
- }
- (*batchp)->index = 0;
- }
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf = (unsigned long)table | shift;
- (*batchp)->tables[(*batchp)->index++] = pgf;
- if ((*batchp)->index == PTE_FREELIST_SIZE) {
- pte_free_submit(*batchp);
- *batchp = NULL;
- }
-}
-
-void pte_free_finish(struct mmu_gather *tlb)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
-
- if (*batchp == NULL)
- return;
- pte_free_submit(*batchp);
- *batchp = NULL;
-}
-
-#endif /* CONFIG_SMP */
-
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
*/
_tlbia();
}
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -165,9 +165,6 @@ void tlb_flush(struct mmu_gather *tlb)
__flush_tlb_pending(tlbbatch);
put_cpu_var(ppc64_tlb_batch);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
void tlb_flush(struct mmu_gather *tlb)
{
flush_tlb_mm(tlb->mm);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -27,6 +27,49 @@
#define tlb_fast_mode(tlb) 1
#endif
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+/*
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the copmletion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
+ */
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#endif
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
@@ -36,11 +79,12 @@ struct mmu_gather {
unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
-#ifdef HAVE_ARCH_MMU_GATHER
- struct arch_mmu_gather arch;
-#endif
struct page **pages;
struct page *local[8];
+
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ struct mmu_table_batch *batch;
+#endif
};
static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
@@ -72,8 +116,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
tlb->fullmm = full_mm_flush;
-#ifdef HAVE_ARCH_MMU_GATHER
- tlb->arch = ARCH_MMU_GATHER_INIT;
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ tlb->batch = NULL;
#endif
}
@@ -84,6 +128,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
return;
tlb->need_flush = 0;
tlb_flush(tlb);
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -193,6 +193,83 @@ static void check_sync_rss_stat(struct t
#endif
+#ifdef HAVE_ARCH_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely on
+ * IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->need_flush = 1;
+
+ /*
+ * When there's less then two users of this mm there cannot be a
+ * concurrent page-table walk.
+ */
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
+ __tlb_remove_table(table);
+ return;
+ }
+
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)__get_free_page(GFP_ATOMIC);
+ if (*batch == NULL) {
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_table_flush(tlb);
+}
+
+#endif
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
next prev parent reply other threads:[~2010-08-28 14:27 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-28 14:16 [PATCH 00/20] mm: Preemptibility -v4 Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-31 6:10 ` Benjamin Herrenschmidt
2010-08-28 14:16 ` [PATCH 02/20] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 15:08 ` Pekka Enberg
2010-08-28 14:16 ` [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 05/20] mm: Simplify anon_vma refcounts Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 15:13 ` Pekka Enberg
2010-08-28 14:16 ` [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 08/20] powerpc: " Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-31 6:26 ` Benjamin Herrenschmidt
2010-08-31 6:31 ` Benjamin Herrenschmidt
2010-08-31 9:14 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 09/20] sparc: " Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 10/20] s390: preemptible mmu_gather Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 11/20] arm: Preemptible mmu_gather Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 12/20] sh: " Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 13/20] um: " Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 14/20] ia64: " Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-30 15:44 ` Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra [this message]
2010-08-28 14:16 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2010-08-28 14:16 ` [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 17/20] mutex: Provide mutex_is_contended Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 19/20] mm: Extended batches for generic mmu_gather Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:32 ` [PATCH 00/20] mm: Preemptibility -v4 Peter Zijlstra
2010-08-28 22:28 ` David Miller
2010-08-28 22:41 ` Peter Zijlstra
2010-08-28 14:56 ` Piotr Hosowicz
2010-08-28 15:10 ` Peter Zijlstra
2010-08-28 15:17 ` Piotr Hosowicz
2010-08-28 15:23 ` Peter Zijlstra
2010-08-28 16:01 ` Piotr Hosowicz
2010-08-29 12:46 ` Piotr Hosowicz
2010-08-29 13:37 ` Peter Zijlstra
2010-08-29 13:43 ` Piotr Hosowicz
2010-08-31 14:02 ` Piotr Hosowicz
2010-08-31 14:14 ` Piotr Hosowicz
2010-09-02 14:53 ` Piotr Hosowicz
2010-08-28 15:19 ` Pekka Enberg
2010-08-28 15:27 ` Peter Zijlstra
[not found] ` <AANLkTikSm2Mq8hGNac9rpFH-3pvryw2kW57EP45Ny6Vp@mail.gmail.com>
2010-09-14 5:36 ` Alex,Shi
2010-09-14 7:42 ` Peter Zijlstra
-- strict thread matches above, loose matches on Subject: below --
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
2010-10-18 11:24 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100828142456.450686838@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=aarcange@redhat.com \
--cc=akpm@linux-fou \
--cc=avi@redhat.com \
--cc=benh@kernel.crashing.org \
--cc=davem@davemloft.net \
--cc=hugh.dickins@tiscali.co.uk \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mel@csn.ul.ie \
--cc=mingo@elte.hu \
--cc=npiggin@suse.de \
--cc=paulmck@linux.vnet.ibm.com \
--cc=riel@redhat.com \
--cc=sfr@canb.auug.org.au \
--cc=tglx@linutronix.de \
--cc=yanmin_zhang@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.