* [PATCH 07/20] mm: Preemptible mmu_gather
2010-08-28 14:16 [PATCH 00/20] mm: Preemptibility -v4 Peter Zijlstra
@ 2010-08-28 14:16 ` Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
0 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-08-28 14:16 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky,
Russell King, Paul Mundt, Jeff Dike, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather.patch --]
[-- Type: text/plain, Size: 10027 bytes --]
Make mmu_gather preemptible by using a small on stack list and use
an option allocation to speed things up.
Preemptible mmu_gather is desired in general and usable once
i_mmap_lock becomes a mutex. Doing it before the mutex conversion
saves us from having to rework the code by moving the mmu_gather
bits inside the i_mmap_lock.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
fs/exec.c | 10 ++++-----
include/asm-generic/tlb.h | 51 +++++++++++++++++++++++++++++-----------------
include/linux/mm.h | 2 -
mm/memory.c | 27 +++++-------------------
mm/mmap.c | 16 +++++++-------
5 files changed, 53 insertions(+), 53 deletions(-)
Index: linux-2.6/fs/exec.c
===================================================================
--- linux-2.6.orig/fs/exec.c
+++ linux-2.6/fs/exec.c
@@ -504,7 +504,7 @@ static int shift_arg_pages(struct vm_are
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -530,12 +530,12 @@ static int shift_arg_pages(struct vm_are
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -544,10 +544,10 @@ static int shift_arg_pages(struct vm_are
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -22,14 +22,8 @@
* and page free order so much..
*/
#ifdef CONFIG_SMP
- #ifdef ARCH_FREE_PTR_NR
- #define FREE_PTR_NR ARCH_FREE_PTR_NR
- #else
- #define FREE_PTE_NR 506
- #endif
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
- #define FREE_PTE_NR 1
#define tlb_fast_mode(tlb) 1
#endif
@@ -39,30 +33,48 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* set to ~0U means fast mode */
+ unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
- struct page * pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+ struct arch_mmu_gather arch;
+#endif
+ struct page **pages;
+ struct page *local[8];
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+{
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(struct page *);
+ }
+}
/* tlb_gather_mmu
* Return a pointer to an initialized struct mmu_gather.
*/
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
- /* Use fast mode if only one CPU is online */
- tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
+
+ if (num_online_cpus() > 1) {
+ tlb->nr = 0;
+ __tlb_alloc_pages(tlb);
+ } else /* Use fast mode if only one CPU is online */
+ tlb->nr = ~0U;
tlb->fullmm = full_mm_flush;
- return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+ tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
}
static inline void
@@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
+ if (tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
}
}
@@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/* tlb_remove_page
@@ -106,7 +121,7 @@ static inline void tlb_remove_page(struc
return;
}
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
tlb_flush_mmu(tlb, 0, 0);
}
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -769,7 +769,7 @@ int zap_vma_ptes(struct vm_area_struct *
unsigned long size);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1093,17 +1093,14 @@ static unsigned long unmap_page_range(st
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
long zap_work = ZAP_BLOCK_SIZE;
- unsigned long tlb_start = 0; /* For tlb_finish_mmu */
- int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = (*tlbp)->fullmm;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1124,11 +1121,6 @@ unsigned long unmap_vmas(struct mmu_gath
untrack_pfn_vma(vma, 0, 0);
while (start != end) {
- if (!tlb_start_valid) {
- tlb_start = start;
- tlb_start_valid = 1;
- }
-
if (unlikely(is_vm_hugetlb_page(vma))) {
/*
* It is undesirable to test vma->vm_file as it
@@ -1149,7 +1141,7 @@ unsigned long unmap_vmas(struct mmu_gath
start = end;
} else
- start = unmap_page_range(*tlbp, vma,
+ start = unmap_page_range(tlb, vma,
start, end, &zap_work, details);
if (zap_work > 0) {
@@ -1157,19 +1149,13 @@ unsigned long unmap_vmas(struct mmu_gath
break;
}
- tlb_finish_mmu(*tlbp, tlb_start, start);
-
if (need_resched() ||
(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
- if (i_mmap_lock) {
- *tlbp = NULL;
+ if (i_mmap_lock)
goto out;
- }
cond_resched();
}
- *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
- tlb_start_valid = 0;
zap_work = ZAP_BLOCK_SIZE;
}
}
@@ -1189,16 +1175,15 @@ unsigned long zap_page_range(struct vm_a
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long end = address + size;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
- if (tlb)
- tlb_finish_mmu(tlb, address, end);
+ tlb_finish_mmu(&tlb, address, end);
return end;
}
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1896,17 +1896,17 @@ static void unmap_region(struct mm_struc
unsigned long start, unsigned long end)
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
- tlb_finish_mmu(tlb, start, end);
+ tlb_finish_mmu(&tlb, start, end);
}
/*
@@ -2247,7 +2247,7 @@ EXPORT_SYMBOL(do_brk);
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
@@ -2272,14 +2272,14 @@ void exit_mmap(struct mm_struct *mm)
lru_add_drain();
flush_cache_mm(mm);
- tlb = tlb_gather_mmu(mm, 1);
+ tlb_gather_mmu(&tlb, mm, 1);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
- tlb_finish_mmu(tlb, 0, end);
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(&tlb, 0, end);
/*
* Walk the list again, actually closing and freeing it,
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 07/20] mm: Preemptible mmu_gather
2010-08-28 14:16 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
@ 2010-08-28 14:16 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-08-28 14:16 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky,
Russell King, Paul Mundt, Jeff Dike, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather.patch --]
[-- Type: text/plain, Size: 10029 bytes --]
Make mmu_gather preemptible by using a small on stack list and use
an option allocation to speed things up.
Preemptible mmu_gather is desired in general and usable once
i_mmap_lock becomes a mutex. Doing it before the mutex conversion
saves us from having to rework the code by moving the mmu_gather
bits inside the i_mmap_lock.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
fs/exec.c | 10 ++++-----
include/asm-generic/tlb.h | 51 +++++++++++++++++++++++++++++-----------------
include/linux/mm.h | 2 -
mm/memory.c | 27 +++++-------------------
mm/mmap.c | 16 +++++++-------
5 files changed, 53 insertions(+), 53 deletions(-)
Index: linux-2.6/fs/exec.c
===================================================================
--- linux-2.6.orig/fs/exec.c
+++ linux-2.6/fs/exec.c
@@ -504,7 +504,7 @@ static int shift_arg_pages(struct vm_are
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -530,12 +530,12 @@ static int shift_arg_pages(struct vm_are
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -544,10 +544,10 @@ static int shift_arg_pages(struct vm_are
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -22,14 +22,8 @@
* and page free order so much..
*/
#ifdef CONFIG_SMP
- #ifdef ARCH_FREE_PTR_NR
- #define FREE_PTR_NR ARCH_FREE_PTR_NR
- #else
- #define FREE_PTE_NR 506
- #endif
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
- #define FREE_PTE_NR 1
#define tlb_fast_mode(tlb) 1
#endif
@@ -39,30 +33,48 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* set to ~0U means fast mode */
+ unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
- struct page * pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+ struct arch_mmu_gather arch;
+#endif
+ struct page **pages;
+ struct page *local[8];
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+{
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(struct page *);
+ }
+}
/* tlb_gather_mmu
* Return a pointer to an initialized struct mmu_gather.
*/
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
- /* Use fast mode if only one CPU is online */
- tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
+
+ if (num_online_cpus() > 1) {
+ tlb->nr = 0;
+ __tlb_alloc_pages(tlb);
+ } else /* Use fast mode if only one CPU is online */
+ tlb->nr = ~0U;
tlb->fullmm = full_mm_flush;
- return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+ tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
}
static inline void
@@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
+ if (tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
}
}
@@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/* tlb_remove_page
@@ -106,7 +121,7 @@ static inline void tlb_remove_page(struc
return;
}
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
tlb_flush_mmu(tlb, 0, 0);
}
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -769,7 +769,7 @@ int zap_vma_ptes(struct vm_area_struct *
unsigned long size);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1093,17 +1093,14 @@ static unsigned long unmap_page_range(st
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
long zap_work = ZAP_BLOCK_SIZE;
- unsigned long tlb_start = 0; /* For tlb_finish_mmu */
- int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = (*tlbp)->fullmm;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1124,11 +1121,6 @@ unsigned long unmap_vmas(struct mmu_gath
untrack_pfn_vma(vma, 0, 0);
while (start != end) {
- if (!tlb_start_valid) {
- tlb_start = start;
- tlb_start_valid = 1;
- }
-
if (unlikely(is_vm_hugetlb_page(vma))) {
/*
* It is undesirable to test vma->vm_file as it
@@ -1149,7 +1141,7 @@ unsigned long unmap_vmas(struct mmu_gath
start = end;
} else
- start = unmap_page_range(*tlbp, vma,
+ start = unmap_page_range(tlb, vma,
start, end, &zap_work, details);
if (zap_work > 0) {
@@ -1157,19 +1149,13 @@ unsigned long unmap_vmas(struct mmu_gath
break;
}
- tlb_finish_mmu(*tlbp, tlb_start, start);
-
if (need_resched() ||
(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
- if (i_mmap_lock) {
- *tlbp = NULL;
+ if (i_mmap_lock)
goto out;
- }
cond_resched();
}
- *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
- tlb_start_valid = 0;
zap_work = ZAP_BLOCK_SIZE;
}
}
@@ -1189,16 +1175,15 @@ unsigned long zap_page_range(struct vm_a
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long end = address + size;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
- if (tlb)
- tlb_finish_mmu(tlb, address, end);
+ tlb_finish_mmu(&tlb, address, end);
return end;
}
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1896,17 +1896,17 @@ static void unmap_region(struct mm_struc
unsigned long start, unsigned long end)
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
- tlb_finish_mmu(tlb, start, end);
+ tlb_finish_mmu(&tlb, start, end);
}
/*
@@ -2247,7 +2247,7 @@ EXPORT_SYMBOL(do_brk);
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
@@ -2272,14 +2272,14 @@ void exit_mmap(struct mm_struct *mm)
lru_add_drain();
flush_cache_mm(mm);
- tlb = tlb_gather_mmu(mm, 1);
+ tlb_gather_mmu(&tlb, mm, 1);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
- tlb_finish_mmu(tlb, 0, end);
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(&tlb, 0, end);
/*
* Walk the list again, actually closing and freeing it,
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 00/20] mm: Preemptibility -v5
@ 2010-10-18 11:24 Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
` (21 more replies)
0 siblings, 22 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
This patch-set makes part of the mm a lot more preemptible. It converts
i_mmap_lock and anon_vma->lock to mutexes and makes mmu_gather fully
preemptible.
The main motivation was making mm_take_all_locks() preemptible, since it
appears people are nesting hundreds of spinlocks there.
The side-effects are that can finally make mmu_gather preemptible,
something which lots of people have wanted to do for a long time.
It also gets us anon_vma refcounting, which seems to result in a nice
cleanup of the anon_vma lifetime rules wrt KSM and compaction.
This patch-set it build and boot-tested on x86_64 (a previous version was
also tested on Dave's Niagra2 machines, and I suppose s390 was too when
Martin provided the conversion patch for his arch).
There are no known architectures left unconverted.
Yanmin ran the -v3 posting through the comprehensive Intel test farm
and didn't find any regressions.
( Not included in this posting are the 4 Sparc64 patches that implement
gup_fast, those can be applied separately after this series gets
anywhere. )
The full series (including the Sparc64 gup_fast bits) also available in -git
form from (against Linus' tree as of about an hour ago):
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-mmu_preempt.git mmu_preempt
DaveM mentioned some sparc64 trouble with the -v4 posting, this turned out to
be a false positive as unpatched kernels also are having trouble on his
machines.
Linus, Andrew, Stephen, can we add this to -next for .37?
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 00/20] mm: Preemptibility -v5
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
` (20 subsequent siblings)
21 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
This patch-set makes part of the mm a lot more preemptible. It converts
i_mmap_lock and anon_vma->lock to mutexes and makes mmu_gather fully
preemptible.
The main motivation was making mm_take_all_locks() preemptible, since it
appears people are nesting hundreds of spinlocks there.
The side-effects are that can finally make mmu_gather preemptible,
something which lots of people have wanted to do for a long time.
It also gets us anon_vma refcounting, which seems to result in a nice
cleanup of the anon_vma lifetime rules wrt KSM and compaction.
This patch-set it build and boot-tested on x86_64 (a previous version was
also tested on Dave's Niagra2 machines, and I suppose s390 was too when
Martin provided the conversion patch for his arch).
There are no known architectures left unconverted.
Yanmin ran the -v3 posting through the comprehensive Intel test farm
and didn't find any regressions.
( Not included in this posting are the 4 Sparc64 patches that implement
gup_fast, those can be applied separately after this series gets
anywhere. )
The full series (including the Sparc64 gup_fast bits) also available in -git
form from (against Linus' tree as of about an hour ago):
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-mmu_preempt.git mmu_preempt
DaveM mentioned some sparc64 trouble with the -v4 posting, this turned out to
be a false positive as unpatched kernels also are having trouble on his
machines.
Linus, Andrew, Stephen, can we add this to -next for .37?
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 02/20] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
` (19 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Nick Piggin
[-- Attachment #1: powerpc-pgtable-call_rcu_sched.patch --]
[-- Type: text/plain, Size: 901 bytes --]
PowerPC relies on IRQ-disable to guard against RCU quiecent states,
use the appropriate RCU call version.
Cc: Nick Piggin <npiggin@suse.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
arch/powerpc/mm/pgtable.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct
static void pte_free_submit(struct pte_freelist_batch *batch)
{
- call_rcu(&batch->rcu, pte_free_rcu_callback);
+ call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
}
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables
2010-10-18 11:24 ` [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Nick Piggin
[-- Attachment #1: powerpc-pgtable-call_rcu_sched.patch --]
[-- Type: text/plain, Size: 903 bytes --]
PowerPC relies on IRQ-disable to guard against RCU quiecent states,
use the appropriate RCU call version.
Cc: Nick Piggin <npiggin@suse.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
arch/powerpc/mm/pgtable.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct
static void pte_free_submit(struct pte_freelist_batch *batch)
{
- call_rcu(&batch->rcu, pte_free_rcu_callback);
+ call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
}
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 02/20] mm: Improve page_lock_anon_vma() comment
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
` (18 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-page_lock_anon_vma-comment.patch --]
[-- Type: text/plain, Size: 1628 bytes --]
A slightly more verbose comment to go along with the trickery in
page_lock_anon_vma().
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
LKML-Reference: <1271158226.4807.1107.camel@twins>
---
mm/rmap.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -311,8 +311,22 @@ void __init anon_vma_init(void)
}
/*
- * Getting a lock on a stable anon_vma from a page off the LRU is
- * tricky: page_lock_anon_vma rely on RCU to guard against the races.
+ * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
+ *
+ * Since there is no serialization what so ever against page_remove_rmap()
+ * the best this function can do is return a locked anon_vma that might
+ * have been relevant to this page.
+ *
+ * The page might have been remapped to a different anon_vma or the anon_vma
+ * returned may already be freed (and even reused).
+ *
+ * All users of this function must be very careful when walking the anon_vma
+ * chain and verify that the page in question is indeed mapped in it
+ * [ something equivalent to page_mapped_in_vma() ].
+ *
+ * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
+ * that the anon_vma pointer from page->mapping is valid if there is a
+ * mapcount, we can dereference the anon_vma after observing those.
*/
struct anon_vma *page_lock_anon_vma(struct page *page)
{
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 02/20] mm: Improve page_lock_anon_vma() comment
2010-10-18 11:24 ` [PATCH 02/20] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-page_lock_anon_vma-comment.patch --]
[-- Type: text/plain, Size: 1630 bytes --]
A slightly more verbose comment to go along with the trickery in
page_lock_anon_vma().
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
LKML-Reference: <1271158226.4807.1107.camel@twins>
---
mm/rmap.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -311,8 +311,22 @@ void __init anon_vma_init(void)
}
/*
- * Getting a lock on a stable anon_vma from a page off the LRU is
- * tricky: page_lock_anon_vma rely on RCU to guard against the races.
+ * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
+ *
+ * Since there is no serialization what so ever against page_remove_rmap()
+ * the best this function can do is return a locked anon_vma that might
+ * have been relevant to this page.
+ *
+ * The page might have been remapped to a different anon_vma or the anon_vma
+ * returned may already be freed (and even reused).
+ *
+ * All users of this function must be very careful when walking the anon_vma
+ * chain and verify that the page in question is indeed mapped in it
+ * [ something equivalent to page_mapped_in_vma() ].
+ *
+ * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
+ * that the anon_vma pointer from page->mapping is valid if there is a
+ * mapcount, we can dereference the anon_vma after observing those.
*/
struct anon_vma *page_lock_anon_vma(struct page *page)
{
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (2 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 02/20] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
` (17 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: put_anon_vma.patch --]
[-- Type: text/plain, Size: 3441 bytes --]
The normal code pattern used in the kernel is: get/put.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/rmap.h | 4 ++--
mm/ksm.c | 10 +++++-----
mm/migrate.c | 2 +-
mm/rmap.c | 4 ++--
4 files changed, 10 insertions(+), 10 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -87,7 +87,7 @@ static inline void get_anon_vma(struct a
atomic_inc(&anon_vma->external_refcount);
}
-void drop_anon_vma(struct anon_vma *);
+void put_anon_vma(struct anon_vma *);
#else
static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
{
@@ -102,7 +102,7 @@ static inline void get_anon_vma(struct a
{
}
-static inline void drop_anon_vma(struct anon_vma *anon_vma)
+static inline void put_anon_vma(struct anon_vma *anon_vma)
{
}
#endif /* CONFIG_KSM */
Index: linux-2.6/mm/ksm.c
===================================================================
--- linux-2.6.orig/mm/ksm.c
+++ linux-2.6/mm/ksm.c
@@ -307,11 +307,11 @@ static void hold_anon_vma(struct rmap_it
get_anon_vma(anon_vma);
}
-static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_put_anon_vma(struct rmap_item *rmap_item)
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- drop_anon_vma(anon_vma);
+ put_anon_vma(anon_vma);
}
/*
@@ -396,7 +396,7 @@ static void break_cow(struct rmap_item *
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -451,7 +451,7 @@ static void remove_node_from_stable_tree
ksm_pages_sharing--;
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -539,7 +539,7 @@ static void remove_rmap_item_from_tree(s
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
Index: linux-2.6/mm/migrate.c
===================================================================
--- linux-2.6.orig/mm/migrate.c
+++ linux-2.6/mm/migrate.c
@@ -683,7 +683,7 @@ rcu_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
- drop_anon_vma(anon_vma);
+ put_anon_vma(anon_vma);
if (rcu_locked)
rcu_read_unlock();
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -274,7 +274,7 @@ static void anon_vma_unlink(struct anon_
if (empty) {
/* We no longer need the root anon_vma */
if (anon_vma->root != anon_vma)
- drop_anon_vma(anon_vma->root);
+ put_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
}
}
@@ -1448,7 +1448,7 @@ int try_to_munlock(struct page *page)
* we know we are the last user, nobody else can get a reference and we
* can do the freeing without the lock.
*/
-void drop_anon_vma(struct anon_vma *anon_vma)
+void put_anon_vma(struct anon_vma *anon_vma)
{
BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma
2010-10-18 11:24 ` [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: put_anon_vma.patch --]
[-- Type: text/plain, Size: 3443 bytes --]
The normal code pattern used in the kernel is: get/put.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/rmap.h | 4 ++--
mm/ksm.c | 10 +++++-----
mm/migrate.c | 2 +-
mm/rmap.c | 4 ++--
4 files changed, 10 insertions(+), 10 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -87,7 +87,7 @@ static inline void get_anon_vma(struct a
atomic_inc(&anon_vma->external_refcount);
}
-void drop_anon_vma(struct anon_vma *);
+void put_anon_vma(struct anon_vma *);
#else
static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
{
@@ -102,7 +102,7 @@ static inline void get_anon_vma(struct a
{
}
-static inline void drop_anon_vma(struct anon_vma *anon_vma)
+static inline void put_anon_vma(struct anon_vma *anon_vma)
{
}
#endif /* CONFIG_KSM */
Index: linux-2.6/mm/ksm.c
===================================================================
--- linux-2.6.orig/mm/ksm.c
+++ linux-2.6/mm/ksm.c
@@ -307,11 +307,11 @@ static void hold_anon_vma(struct rmap_it
get_anon_vma(anon_vma);
}
-static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_put_anon_vma(struct rmap_item *rmap_item)
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- drop_anon_vma(anon_vma);
+ put_anon_vma(anon_vma);
}
/*
@@ -396,7 +396,7 @@ static void break_cow(struct rmap_item *
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -451,7 +451,7 @@ static void remove_node_from_stable_tree
ksm_pages_sharing--;
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -539,7 +539,7 @@ static void remove_rmap_item_from_tree(s
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ ksm_put_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
Index: linux-2.6/mm/migrate.c
===================================================================
--- linux-2.6.orig/mm/migrate.c
+++ linux-2.6/mm/migrate.c
@@ -683,7 +683,7 @@ rcu_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
- drop_anon_vma(anon_vma);
+ put_anon_vma(anon_vma);
if (rcu_locked)
rcu_read_unlock();
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -274,7 +274,7 @@ static void anon_vma_unlink(struct anon_
if (empty) {
/* We no longer need the root anon_vma */
if (anon_vma->root != anon_vma)
- drop_anon_vma(anon_vma->root);
+ put_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
}
}
@@ -1448,7 +1448,7 @@ int try_to_munlock(struct page *page)
* we know we are the last user, nobody else can get a reference and we
* can do the freeing without the lock.
*/
-void drop_anon_vma(struct anon_vma *anon_vma)
+void put_anon_vma(struct anon_vma *anon_vma)
{
BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (3 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 05/20] mm: Simplify anon_vma refcounts Peter Zijlstra
` (16 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-anon_vma-ref.patch --]
[-- Type: text/plain, Size: 4453 bytes --]
We need an anon_vma refcount for preemptible anon_vma->lock.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
---
include/linux/rmap.h | 40 ++++------------------------------------
mm/rmap.c | 14 ++++++--------
2 files changed, 10 insertions(+), 44 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -27,18 +27,15 @@
struct anon_vma {
spinlock_t lock; /* Serialize access to vma list */
struct anon_vma *root; /* Root of this anon_vma tree */
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-
/*
- * The external_refcount is taken by either KSM or page migration
- * to take a reference to an anon_vma when there is no
+ * The refcount is taken on an anon_vma when there is no
* guarantee that the vma of page tables will exist for
* the duration of the operation. A caller that takes
* the reference is responsible for clearing up the
* anon_vma if they are the last user on release
*/
- atomic_t external_refcount;
-#endif
+ atomic_t refcount;
+
/*
* NOTE: the LSB of the head.next is set by
* mm_take_all_locks() _after_ taking the above lock. So the
@@ -71,41 +68,12 @@ struct anon_vma_chain {
};
#ifdef CONFIG_MMU
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
- atomic_set(&anon_vma->external_refcount, 0);
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
- return atomic_read(&anon_vma->external_refcount);
-}
-
static inline void get_anon_vma(struct anon_vma *anon_vma)
{
- atomic_inc(&anon_vma->external_refcount);
+ atomic_inc(&anon_vma->refcount);
}
void put_anon_vma(struct anon_vma *);
-#else
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
- return 0;
-}
-
-static inline void get_anon_vma(struct anon_vma *anon_vma)
-{
-}
-
-static inline void put_anon_vma(struct anon_vma *anon_vma)
-{
-}
-#endif /* CONFIG_KSM */
static inline struct anon_vma *page_anon_vma(struct page *page)
{
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -268,7 +268,7 @@ static void anon_vma_unlink(struct anon_
list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
+ empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
anon_vma_unlock(anon_vma);
if (empty) {
@@ -299,7 +299,7 @@ static void anon_vma_ctor(void *data)
struct anon_vma *anon_vma = data;
spin_lock_init(&anon_vma->lock);
- anonvma_external_refcount_init(anon_vma);
+ atomic_set(&anon_vma->refcount, 0);
INIT_LIST_HEAD(&anon_vma->head);
}
@@ -1441,7 +1441,6 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
/*
* Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
* if necessary. Be careful to do all the tests under the lock. Once
@@ -1450,8 +1449,8 @@ int try_to_munlock(struct page *page)
*/
void put_anon_vma(struct anon_vma *anon_vma)
{
- BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
- if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+ BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
+ if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
struct anon_vma *root = anon_vma->root;
int empty = list_empty(&anon_vma->head);
int last_root_user = 0;
@@ -1462,8 +1461,8 @@ void put_anon_vma(struct anon_vma *anon_
* the refcount on the root and check if we need to free it.
*/
if (empty && anon_vma != root) {
- BUG_ON(atomic_read(&root->external_refcount) <= 0);
- last_root_user = atomic_dec_and_test(&root->external_refcount);
+ BUG_ON(atomic_read(&root->refcount) <= 0);
+ last_root_user = atomic_dec_and_test(&root->refcount);
root_empty = list_empty(&root->head);
}
anon_vma_unlock(anon_vma);
@@ -1475,7 +1474,6 @@ void put_anon_vma(struct anon_vma *anon_
}
}
}
-#endif
#ifdef CONFIG_MIGRATION
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM
2010-10-18 11:24 ` [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-anon_vma-ref.patch --]
[-- Type: text/plain, Size: 4455 bytes --]
We need an anon_vma refcount for preemptible anon_vma->lock.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
---
include/linux/rmap.h | 40 ++++------------------------------------
mm/rmap.c | 14 ++++++--------
2 files changed, 10 insertions(+), 44 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -27,18 +27,15 @@
struct anon_vma {
spinlock_t lock; /* Serialize access to vma list */
struct anon_vma *root; /* Root of this anon_vma tree */
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-
/*
- * The external_refcount is taken by either KSM or page migration
- * to take a reference to an anon_vma when there is no
+ * The refcount is taken on an anon_vma when there is no
* guarantee that the vma of page tables will exist for
* the duration of the operation. A caller that takes
* the reference is responsible for clearing up the
* anon_vma if they are the last user on release
*/
- atomic_t external_refcount;
-#endif
+ atomic_t refcount;
+
/*
* NOTE: the LSB of the head.next is set by
* mm_take_all_locks() _after_ taking the above lock. So the
@@ -71,41 +68,12 @@ struct anon_vma_chain {
};
#ifdef CONFIG_MMU
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
- atomic_set(&anon_vma->external_refcount, 0);
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
- return atomic_read(&anon_vma->external_refcount);
-}
-
static inline void get_anon_vma(struct anon_vma *anon_vma)
{
- atomic_inc(&anon_vma->external_refcount);
+ atomic_inc(&anon_vma->refcount);
}
void put_anon_vma(struct anon_vma *);
-#else
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
- return 0;
-}
-
-static inline void get_anon_vma(struct anon_vma *anon_vma)
-{
-}
-
-static inline void put_anon_vma(struct anon_vma *anon_vma)
-{
-}
-#endif /* CONFIG_KSM */
static inline struct anon_vma *page_anon_vma(struct page *page)
{
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -268,7 +268,7 @@ static void anon_vma_unlink(struct anon_
list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
+ empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
anon_vma_unlock(anon_vma);
if (empty) {
@@ -299,7 +299,7 @@ static void anon_vma_ctor(void *data)
struct anon_vma *anon_vma = data;
spin_lock_init(&anon_vma->lock);
- anonvma_external_refcount_init(anon_vma);
+ atomic_set(&anon_vma->refcount, 0);
INIT_LIST_HEAD(&anon_vma->head);
}
@@ -1441,7 +1441,6 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
/*
* Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
* if necessary. Be careful to do all the tests under the lock. Once
@@ -1450,8 +1449,8 @@ int try_to_munlock(struct page *page)
*/
void put_anon_vma(struct anon_vma *anon_vma)
{
- BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
- if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+ BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
+ if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
struct anon_vma *root = anon_vma->root;
int empty = list_empty(&anon_vma->head);
int last_root_user = 0;
@@ -1462,8 +1461,8 @@ void put_anon_vma(struct anon_vma *anon_
* the refcount on the root and check if we need to free it.
*/
if (empty && anon_vma != root) {
- BUG_ON(atomic_read(&root->external_refcount) <= 0);
- last_root_user = atomic_dec_and_test(&root->external_refcount);
+ BUG_ON(atomic_read(&root->refcount) <= 0);
+ last_root_user = atomic_dec_and_test(&root->refcount);
root_empty = list_empty(&root->head);
}
anon_vma_unlock(anon_vma);
@@ -1475,7 +1474,6 @@ void put_anon_vma(struct anon_vma *anon_
}
}
}
-#endif
#ifdef CONFIG_MIGRATION
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 05/20] mm: Simplify anon_vma refcounts
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (4 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
` (15 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-use-anon_vma-ref.patch --]
[-- Type: text/plain, Size: 6407 bytes --]
This patch changes the anon_vma refcount to be 0 when the object is
free. It does this by adding 1 ref to being in use in the anon_vma
structure (iow. the anon_vma->head list is not empty).
This allows a simpler release scheme without having to check both the
refcount and the list as well as avoids taking a ref for each entry
on the list.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/rmap.h | 11 +++++--
mm/ksm.c | 4 --
mm/rmap.c | 79 ++++++++++++++++++---------------------------------
3 files changed, 38 insertions(+), 56 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -73,7 +73,13 @@ static inline void get_anon_vma(struct a
atomic_inc(&anon_vma->refcount);
}
-void put_anon_vma(struct anon_vma *);
+void __put_anon_vma(struct anon_vma *anon_vma);
+
+static inline void put_anon_vma(struct anon_vma *anon_vma)
+{
+ if (atomic_dec_and_test(&anon_vma->refcount))
+ __put_anon_vma(anon_vma);
+}
static inline struct anon_vma *page_anon_vma(struct page *page)
{
@@ -116,7 +122,6 @@ void unlink_anon_vmas(struct vm_area_str
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
void __anon_vma_link(struct vm_area_struct *);
-void anon_vma_free(struct anon_vma *);
static inline void anon_vma_merge(struct vm_area_struct *vma,
struct vm_area_struct *next)
@@ -125,6 +130,8 @@ static inline void anon_vma_merge(struct
unlink_anon_vmas(next);
}
+struct anon_vma *page_get_anon_vma(struct page *page);
+
/*
* rmap interfaces called when adding or removing pte of page
*/
Index: linux-2.6/mm/ksm.c
===================================================================
--- linux-2.6.orig/mm/ksm.c
+++ linux-2.6/mm/ksm.c
@@ -309,9 +309,7 @@ static void hold_anon_vma(struct rmap_it
static void ksm_put_anon_vma(struct rmap_item *rmap_item)
{
- struct anon_vma *anon_vma = rmap_item->anon_vma;
-
- put_anon_vma(anon_vma);
+ put_anon_vma(rmap_item->anon_vma);
}
/*
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -67,11 +67,24 @@ static struct kmem_cache *anon_vma_chain
static inline struct anon_vma *anon_vma_alloc(void)
{
- return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+ struct anon_vma *anon_vma;
+
+ anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+ if (anon_vma) {
+ atomic_set(&anon_vma->refcount, 1);
+ /*
+ * Initialise the anon_vma root to point to itself. If called from
+ * fork, the root will be reset to the parents anon_vma.
+ */
+ anon_vma->root = anon_vma;
+ }
+
+ return anon_vma;
}
-void anon_vma_free(struct anon_vma *anon_vma)
+static inline void anon_vma_free(struct anon_vma *anon_vma)
{
+ VM_BUG_ON(atomic_read(&anon_vma->refcount));
kmem_cache_free(anon_vma_cachep, anon_vma);
}
@@ -133,11 +146,6 @@ int anon_vma_prepare(struct vm_area_stru
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
allocated = anon_vma;
- /*
- * This VMA had no anon_vma yet. This anon_vma is
- * the root of any anon_vma tree that might form.
- */
- anon_vma->root = anon_vma;
}
anon_vma_lock(anon_vma);
@@ -156,7 +164,7 @@ int anon_vma_prepare(struct vm_area_stru
anon_vma_unlock(anon_vma);
if (unlikely(allocated))
- anon_vma_free(allocated);
+ put_anon_vma(allocated);
if (unlikely(avc))
anon_vma_chain_free(avc);
}
@@ -237,9 +245,9 @@ int anon_vma_fork(struct vm_area_struct
*/
anon_vma->root = pvma->anon_vma->root;
/*
- * With KSM refcounts, an anon_vma can stay around longer than the
- * process it belongs to. The root anon_vma needs to be pinned
- * until this anon_vma is freed, because the lock lives in the root.
+ * With refcounts, an anon_vma can stay around longer than the
+ * process it belongs to. The root anon_vma needs to be pinned until
+ * this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
@@ -249,7 +257,7 @@ int anon_vma_fork(struct vm_area_struct
return 0;
out_error_free_anon_vma:
- anon_vma_free(anon_vma);
+ put_anon_vma(anon_vma);
out_error:
unlink_anon_vmas(vma);
return -ENOMEM;
@@ -268,15 +276,11 @@ static void anon_vma_unlink(struct anon_
list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
+ empty = list_empty(&anon_vma->head);
anon_vma_unlock(anon_vma);
- if (empty) {
- /* We no longer need the root anon_vma */
- if (anon_vma->root != anon_vma)
- put_anon_vma(anon_vma->root);
- anon_vma_free(anon_vma);
- }
+ if (empty)
+ put_anon_vma(anon_vma);
}
void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1441,38 +1445,11 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
-/*
- * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
- * if necessary. Be careful to do all the tests under the lock. Once
- * we know we are the last user, nobody else can get a reference and we
- * can do the freeing without the lock.
- */
-void put_anon_vma(struct anon_vma *anon_vma)
-{
- BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
- if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
- struct anon_vma *root = anon_vma->root;
- int empty = list_empty(&anon_vma->head);
- int last_root_user = 0;
- int root_empty = 0;
-
- /*
- * The refcount on a non-root anon_vma got dropped. Drop
- * the refcount on the root and check if we need to free it.
- */
- if (empty && anon_vma != root) {
- BUG_ON(atomic_read(&root->refcount) <= 0);
- last_root_user = atomic_dec_and_test(&root->refcount);
- root_empty = list_empty(&root->head);
- }
- anon_vma_unlock(anon_vma);
-
- if (empty) {
- anon_vma_free(anon_vma);
- if (root_empty && last_root_user)
- anon_vma_free(root);
- }
- }
+void __put_anon_vma(struct anon_vma *anon_vma)
+{
+ if (anon_vma->root != anon_vma)
+ put_anon_vma(anon_vma->root);
+ anon_vma_free(anon_vma);
}
#ifdef CONFIG_MIGRATION
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 05/20] mm: Simplify anon_vma refcounts
2010-10-18 11:24 ` [PATCH 05/20] mm: Simplify anon_vma refcounts Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-use-anon_vma-ref.patch --]
[-- Type: text/plain, Size: 6409 bytes --]
This patch changes the anon_vma refcount to be 0 when the object is
free. It does this by adding 1 ref to being in use in the anon_vma
structure (iow. the anon_vma->head list is not empty).
This allows a simpler release scheme without having to check both the
refcount and the list as well as avoids taking a ref for each entry
on the list.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/rmap.h | 11 +++++--
mm/ksm.c | 4 --
mm/rmap.c | 79 ++++++++++++++++++---------------------------------
3 files changed, 38 insertions(+), 56 deletions(-)
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -73,7 +73,13 @@ static inline void get_anon_vma(struct a
atomic_inc(&anon_vma->refcount);
}
-void put_anon_vma(struct anon_vma *);
+void __put_anon_vma(struct anon_vma *anon_vma);
+
+static inline void put_anon_vma(struct anon_vma *anon_vma)
+{
+ if (atomic_dec_and_test(&anon_vma->refcount))
+ __put_anon_vma(anon_vma);
+}
static inline struct anon_vma *page_anon_vma(struct page *page)
{
@@ -116,7 +122,6 @@ void unlink_anon_vmas(struct vm_area_str
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
void __anon_vma_link(struct vm_area_struct *);
-void anon_vma_free(struct anon_vma *);
static inline void anon_vma_merge(struct vm_area_struct *vma,
struct vm_area_struct *next)
@@ -125,6 +130,8 @@ static inline void anon_vma_merge(struct
unlink_anon_vmas(next);
}
+struct anon_vma *page_get_anon_vma(struct page *page);
+
/*
* rmap interfaces called when adding or removing pte of page
*/
Index: linux-2.6/mm/ksm.c
===================================================================
--- linux-2.6.orig/mm/ksm.c
+++ linux-2.6/mm/ksm.c
@@ -309,9 +309,7 @@ static void hold_anon_vma(struct rmap_it
static void ksm_put_anon_vma(struct rmap_item *rmap_item)
{
- struct anon_vma *anon_vma = rmap_item->anon_vma;
-
- put_anon_vma(anon_vma);
+ put_anon_vma(rmap_item->anon_vma);
}
/*
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -67,11 +67,24 @@ static struct kmem_cache *anon_vma_chain
static inline struct anon_vma *anon_vma_alloc(void)
{
- return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+ struct anon_vma *anon_vma;
+
+ anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+ if (anon_vma) {
+ atomic_set(&anon_vma->refcount, 1);
+ /*
+ * Initialise the anon_vma root to point to itself. If called from
+ * fork, the root will be reset to the parents anon_vma.
+ */
+ anon_vma->root = anon_vma;
+ }
+
+ return anon_vma;
}
-void anon_vma_free(struct anon_vma *anon_vma)
+static inline void anon_vma_free(struct anon_vma *anon_vma)
{
+ VM_BUG_ON(atomic_read(&anon_vma->refcount));
kmem_cache_free(anon_vma_cachep, anon_vma);
}
@@ -133,11 +146,6 @@ int anon_vma_prepare(struct vm_area_stru
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
allocated = anon_vma;
- /*
- * This VMA had no anon_vma yet. This anon_vma is
- * the root of any anon_vma tree that might form.
- */
- anon_vma->root = anon_vma;
}
anon_vma_lock(anon_vma);
@@ -156,7 +164,7 @@ int anon_vma_prepare(struct vm_area_stru
anon_vma_unlock(anon_vma);
if (unlikely(allocated))
- anon_vma_free(allocated);
+ put_anon_vma(allocated);
if (unlikely(avc))
anon_vma_chain_free(avc);
}
@@ -237,9 +245,9 @@ int anon_vma_fork(struct vm_area_struct
*/
anon_vma->root = pvma->anon_vma->root;
/*
- * With KSM refcounts, an anon_vma can stay around longer than the
- * process it belongs to. The root anon_vma needs to be pinned
- * until this anon_vma is freed, because the lock lives in the root.
+ * With refcounts, an anon_vma can stay around longer than the
+ * process it belongs to. The root anon_vma needs to be pinned until
+ * this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
@@ -249,7 +257,7 @@ int anon_vma_fork(struct vm_area_struct
return 0;
out_error_free_anon_vma:
- anon_vma_free(anon_vma);
+ put_anon_vma(anon_vma);
out_error:
unlink_anon_vmas(vma);
return -ENOMEM;
@@ -268,15 +276,11 @@ static void anon_vma_unlink(struct anon_
list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
- empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
+ empty = list_empty(&anon_vma->head);
anon_vma_unlock(anon_vma);
- if (empty) {
- /* We no longer need the root anon_vma */
- if (anon_vma->root != anon_vma)
- put_anon_vma(anon_vma->root);
- anon_vma_free(anon_vma);
- }
+ if (empty)
+ put_anon_vma(anon_vma);
}
void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1441,38 +1445,11 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
-/*
- * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
- * if necessary. Be careful to do all the tests under the lock. Once
- * we know we are the last user, nobody else can get a reference and we
- * can do the freeing without the lock.
- */
-void put_anon_vma(struct anon_vma *anon_vma)
-{
- BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
- if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
- struct anon_vma *root = anon_vma->root;
- int empty = list_empty(&anon_vma->head);
- int last_root_user = 0;
- int root_empty = 0;
-
- /*
- * The refcount on a non-root anon_vma got dropped. Drop
- * the refcount on the root and check if we need to free it.
- */
- if (empty && anon_vma != root) {
- BUG_ON(atomic_read(&root->refcount) <= 0);
- last_root_user = atomic_dec_and_test(&root->refcount);
- root_empty = list_empty(&root->head);
- }
- anon_vma_unlock(anon_vma);
-
- if (empty) {
- anon_vma_free(anon_vma);
- if (root_empty && last_root_user)
- anon_vma_free(root);
- }
- }
+void __put_anon_vma(struct anon_vma *anon_vma)
+{
+ if (anon_vma->root != anon_vma)
+ put_anon_vma(anon_vma->root);
+ anon_vma_free(anon_vma);
}
#ifdef CONFIG_MIGRATION
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma()
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (5 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 05/20] mm: Simplify anon_vma refcounts Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
` (14 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-ref-page_lock_anon_vma.patch --]
[-- Type: text/plain, Size: 2056 bytes --]
Convert page_lock_anon_vma() over to use refcounts. This is
done for each of convertion of anon_vma from spinlock to mutex.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
mm/rmap.c | 34 ++++++++++++++++++++++++----------
1 file changed, 24 insertions(+), 10 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -332,9 +332,9 @@ void __init anon_vma_init(void)
* that the anon_vma pointer from page->mapping is valid if there is a
* mapcount, we can dereference the anon_vma after observing those.
*/
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_get_anon_vma(struct page *page)
{
- struct anon_vma *anon_vma, *root_anon_vma;
+ struct anon_vma *anon_vma = NULL;
unsigned long anon_mapping;
rcu_read_lock();
@@ -345,8 +345,10 @@ struct anon_vma *page_lock_anon_vma(stru
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
- root_anon_vma = ACCESS_ONCE(anon_vma->root);
- spin_lock(&root_anon_vma->lock);
+ if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+ anon_vma = NULL;
+ goto out;
+ }
/*
* If this page is still mapped, then its anon_vma cannot have been
@@ -356,19 +358,31 @@ struct anon_vma *page_lock_anon_vma(stru
* corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting
* anon_vma->root before page_unlock_anon_vma() is called to unlock.
*/
- if (page_mapped(page))
- return anon_vma;
-
- spin_unlock(&root_anon_vma->lock);
+ if (!page_mapped(page)) {
+ put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ goto out;
+ }
out:
rcu_read_unlock();
- return NULL;
+
+ return anon_vma;
+}
+
+struct anon_vma *page_lock_anon_vma(struct page *page)
+{
+ struct anon_vma *anon_vma = page_get_anon_vma(page);
+
+ if (anon_vma)
+ anon_vma_lock(anon_vma);
+
+ return anon_vma;
}
void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
anon_vma_unlock(anon_vma);
- rcu_read_unlock();
+ put_anon_vma(anon_vma);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma()
2010-10-18 11:24 ` [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-ref-page_lock_anon_vma.patch --]
[-- Type: text/plain, Size: 2058 bytes --]
Convert page_lock_anon_vma() over to use refcounts. This is
done for each of convertion of anon_vma from spinlock to mutex.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
mm/rmap.c | 34 ++++++++++++++++++++++++----------
1 file changed, 24 insertions(+), 10 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -332,9 +332,9 @@ void __init anon_vma_init(void)
* that the anon_vma pointer from page->mapping is valid if there is a
* mapcount, we can dereference the anon_vma after observing those.
*/
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_get_anon_vma(struct page *page)
{
- struct anon_vma *anon_vma, *root_anon_vma;
+ struct anon_vma *anon_vma = NULL;
unsigned long anon_mapping;
rcu_read_lock();
@@ -345,8 +345,10 @@ struct anon_vma *page_lock_anon_vma(stru
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
- root_anon_vma = ACCESS_ONCE(anon_vma->root);
- spin_lock(&root_anon_vma->lock);
+ if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+ anon_vma = NULL;
+ goto out;
+ }
/*
* If this page is still mapped, then its anon_vma cannot have been
@@ -356,19 +358,31 @@ struct anon_vma *page_lock_anon_vma(stru
* corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting
* anon_vma->root before page_unlock_anon_vma() is called to unlock.
*/
- if (page_mapped(page))
- return anon_vma;
-
- spin_unlock(&root_anon_vma->lock);
+ if (!page_mapped(page)) {
+ put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ goto out;
+ }
out:
rcu_read_unlock();
- return NULL;
+
+ return anon_vma;
+}
+
+struct anon_vma *page_lock_anon_vma(struct page *page)
+{
+ struct anon_vma *anon_vma = page_get_anon_vma(page);
+
+ if (anon_vma)
+ anon_vma_lock(anon_vma);
+
+ return anon_vma;
}
void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
anon_vma_unlock(anon_vma);
- rcu_read_unlock();
+ put_anon_vma(anon_vma);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 07/20] mm: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (6 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 08/20] powerpc: " Peter Zijlstra
` (13 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky,
Russell King, Paul Mundt, Jeff Dike, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather.patch --]
[-- Type: text/plain, Size: 10027 bytes --]
Make mmu_gather preemptible by using a small on stack list and use
an option allocation to speed things up.
Preemptible mmu_gather is desired in general and usable once
i_mmap_lock becomes a mutex. Doing it before the mutex conversion
saves us from having to rework the code by moving the mmu_gather
bits inside the i_mmap_lock.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
fs/exec.c | 10 ++++-----
include/asm-generic/tlb.h | 51 +++++++++++++++++++++++++++++-----------------
include/linux/mm.h | 2 -
mm/memory.c | 27 +++++-------------------
mm/mmap.c | 16 +++++++-------
5 files changed, 53 insertions(+), 53 deletions(-)
Index: linux-2.6/fs/exec.c
===================================================================
--- linux-2.6.orig/fs/exec.c
+++ linux-2.6/fs/exec.c
@@ -504,7 +504,7 @@ static int shift_arg_pages(struct vm_are
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -530,12 +530,12 @@ static int shift_arg_pages(struct vm_are
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -544,10 +544,10 @@ static int shift_arg_pages(struct vm_are
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -22,14 +22,8 @@
* and page free order so much..
*/
#ifdef CONFIG_SMP
- #ifdef ARCH_FREE_PTR_NR
- #define FREE_PTR_NR ARCH_FREE_PTR_NR
- #else
- #define FREE_PTE_NR 506
- #endif
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
- #define FREE_PTE_NR 1
#define tlb_fast_mode(tlb) 1
#endif
@@ -39,30 +33,48 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* set to ~0U means fast mode */
+ unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
- struct page * pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+ struct arch_mmu_gather arch;
+#endif
+ struct page **pages;
+ struct page *local[8];
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+{
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(struct page *);
+ }
+}
/* tlb_gather_mmu
* Return a pointer to an initialized struct mmu_gather.
*/
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
- /* Use fast mode if only one CPU is online */
- tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
+
+ if (num_online_cpus() > 1) {
+ tlb->nr = 0;
+ __tlb_alloc_pages(tlb);
+ } else /* Use fast mode if only one CPU is online */
+ tlb->nr = ~0U;
tlb->fullmm = full_mm_flush;
- return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+ tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
}
static inline void
@@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
+ if (tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
}
}
@@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/* tlb_remove_page
@@ -106,7 +121,7 @@ static inline void tlb_remove_page(struc
return;
}
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
tlb_flush_mmu(tlb, 0, 0);
}
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -769,7 +769,7 @@ int zap_vma_ptes(struct vm_area_struct *
unsigned long size);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1093,17 +1093,14 @@ static unsigned long unmap_page_range(st
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
long zap_work = ZAP_BLOCK_SIZE;
- unsigned long tlb_start = 0; /* For tlb_finish_mmu */
- int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = (*tlbp)->fullmm;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1124,11 +1121,6 @@ unsigned long unmap_vmas(struct mmu_gath
untrack_pfn_vma(vma, 0, 0);
while (start != end) {
- if (!tlb_start_valid) {
- tlb_start = start;
- tlb_start_valid = 1;
- }
-
if (unlikely(is_vm_hugetlb_page(vma))) {
/*
* It is undesirable to test vma->vm_file as it
@@ -1149,7 +1141,7 @@ unsigned long unmap_vmas(struct mmu_gath
start = end;
} else
- start = unmap_page_range(*tlbp, vma,
+ start = unmap_page_range(tlb, vma,
start, end, &zap_work, details);
if (zap_work > 0) {
@@ -1157,19 +1149,13 @@ unsigned long unmap_vmas(struct mmu_gath
break;
}
- tlb_finish_mmu(*tlbp, tlb_start, start);
-
if (need_resched() ||
(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
- if (i_mmap_lock) {
- *tlbp = NULL;
+ if (i_mmap_lock)
goto out;
- }
cond_resched();
}
- *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
- tlb_start_valid = 0;
zap_work = ZAP_BLOCK_SIZE;
}
}
@@ -1189,16 +1175,15 @@ unsigned long zap_page_range(struct vm_a
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long end = address + size;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
- if (tlb)
- tlb_finish_mmu(tlb, address, end);
+ tlb_finish_mmu(&tlb, address, end);
return end;
}
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1896,17 +1896,17 @@ static void unmap_region(struct mm_struc
unsigned long start, unsigned long end)
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
- tlb_finish_mmu(tlb, start, end);
+ tlb_finish_mmu(&tlb, start, end);
}
/*
@@ -2247,7 +2247,7 @@ EXPORT_SYMBOL(do_brk);
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
@@ -2272,14 +2272,14 @@ void exit_mmap(struct mm_struct *mm)
lru_add_drain();
flush_cache_mm(mm);
- tlb = tlb_gather_mmu(mm, 1);
+ tlb_gather_mmu(&tlb, mm, 1);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
- tlb_finish_mmu(tlb, 0, end);
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(&tlb, 0, end);
/*
* Walk the list again, actually closing and freeing it,
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 07/20] mm: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky,
Russell King, Paul Mundt, Jeff Dike, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather.patch --]
[-- Type: text/plain, Size: 10029 bytes --]
Make mmu_gather preemptible by using a small on stack list and use
an option allocation to speed things up.
Preemptible mmu_gather is desired in general and usable once
i_mmap_lock becomes a mutex. Doing it before the mutex conversion
saves us from having to rework the code by moving the mmu_gather
bits inside the i_mmap_lock.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Miller <davem@davemloft.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
fs/exec.c | 10 ++++-----
include/asm-generic/tlb.h | 51 +++++++++++++++++++++++++++++-----------------
include/linux/mm.h | 2 -
mm/memory.c | 27 +++++-------------------
mm/mmap.c | 16 +++++++-------
5 files changed, 53 insertions(+), 53 deletions(-)
Index: linux-2.6/fs/exec.c
===================================================================
--- linux-2.6.orig/fs/exec.c
+++ linux-2.6/fs/exec.c
@@ -504,7 +504,7 @@ static int shift_arg_pages(struct vm_are
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -530,12 +530,12 @@ static int shift_arg_pages(struct vm_are
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
+ free_pgd_range(&tlb, new_end, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
} else {
/*
@@ -544,10 +544,10 @@ static int shift_arg_pages(struct vm_are
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
+ free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : 0);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, new_end, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -22,14 +22,8 @@
* and page free order so much..
*/
#ifdef CONFIG_SMP
- #ifdef ARCH_FREE_PTR_NR
- #define FREE_PTR_NR ARCH_FREE_PTR_NR
- #else
- #define FREE_PTE_NR 506
- #endif
#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
- #define FREE_PTE_NR 1
#define tlb_fast_mode(tlb) 1
#endif
@@ -39,30 +33,48 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* set to ~0U means fast mode */
+ unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
- struct page * pages[FREE_PTE_NR];
+#ifdef HAVE_ARCH_MMU_GATHER
+ struct arch_mmu_gather arch;
+#endif
+ struct page **pages;
+ struct page *local[8];
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+{
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(struct page *);
+ }
+}
/* tlb_gather_mmu
* Return a pointer to an initialized struct mmu_gather.
*/
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
- /* Use fast mode if only one CPU is online */
- tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
+
+ if (num_online_cpus() > 1) {
+ tlb->nr = 0;
+ __tlb_alloc_pages(tlb);
+ } else /* Use fast mode if only one CPU is online */
+ tlb->nr = ~0U;
tlb->fullmm = full_mm_flush;
- return tlb;
+#ifdef HAVE_ARCH_MMU_GATHER
+ tlb->arch = ARCH_MMU_GATHER_INIT;
+#endif
}
static inline void
@@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
+ if (tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
}
}
@@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/* tlb_remove_page
@@ -106,7 +121,7 @@ static inline void tlb_remove_page(struc
return;
}
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
tlb_flush_mmu(tlb, 0, 0);
}
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -769,7 +769,7 @@ int zap_vma_ptes(struct vm_area_struct *
unsigned long size);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1093,17 +1093,14 @@ static unsigned long unmap_page_range(st
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather **tlbp,
+unsigned long unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
long zap_work = ZAP_BLOCK_SIZE;
- unsigned long tlb_start = 0; /* For tlb_finish_mmu */
- int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = (*tlbp)->fullmm;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1124,11 +1121,6 @@ unsigned long unmap_vmas(struct mmu_gath
untrack_pfn_vma(vma, 0, 0);
while (start != end) {
- if (!tlb_start_valid) {
- tlb_start = start;
- tlb_start_valid = 1;
- }
-
if (unlikely(is_vm_hugetlb_page(vma))) {
/*
* It is undesirable to test vma->vm_file as it
@@ -1149,7 +1141,7 @@ unsigned long unmap_vmas(struct mmu_gath
start = end;
} else
- start = unmap_page_range(*tlbp, vma,
+ start = unmap_page_range(tlb, vma,
start, end, &zap_work, details);
if (zap_work > 0) {
@@ -1157,19 +1149,13 @@ unsigned long unmap_vmas(struct mmu_gath
break;
}
- tlb_finish_mmu(*tlbp, tlb_start, start);
-
if (need_resched() ||
(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
- if (i_mmap_lock) {
- *tlbp = NULL;
+ if (i_mmap_lock)
goto out;
- }
cond_resched();
}
- *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
- tlb_start_valid = 0;
zap_work = ZAP_BLOCK_SIZE;
}
}
@@ -1189,16 +1175,15 @@ unsigned long zap_page_range(struct vm_a
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long end = address + size;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
- if (tlb)
- tlb_finish_mmu(tlb, address, end);
+ tlb_finish_mmu(&tlb, address, end);
return end;
}
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -1896,17 +1896,17 @@ static void unmap_region(struct mm_struc
unsigned long start, unsigned long end)
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
- tlb_finish_mmu(tlb, start, end);
+ tlb_finish_mmu(&tlb, start, end);
}
/*
@@ -2247,7 +2247,7 @@ EXPORT_SYMBOL(do_brk);
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
unsigned long end;
@@ -2272,14 +2272,14 @@ void exit_mmap(struct mm_struct *mm)
lru_add_drain();
flush_cache_mm(mm);
- tlb = tlb_gather_mmu(mm, 1);
+ tlb_gather_mmu(&tlb, mm, 1);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
- tlb_finish_mmu(tlb, 0, end);
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(&tlb, 0, end);
/*
* Walk the list again, actually closing and freeing it,
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 08/20] powerpc: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (7 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 09/20] sparc: " Peter Zijlstra
` (12 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-power.patch --]
[-- Type: text/plain, Size: 8703 bytes --]
Fix up powerpc to the new mmu_gather stuffs.
PPC has an extra batching queue to RCU free the actual pagetable
allocations, use the ARCH extentions for that for now.
For the ppc64_tlb_batch, which tracks the vaddrs to unhash from the
hardware hash-table, keep using per-cpu arrays but flush on context
switch and use a TLF bit to track the laxy_mmu state.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/include/asm/pgalloc.h | 4 ++--
arch/powerpc/include/asm/thread_info.h | 2 ++
arch/powerpc/include/asm/tlb.h | 10 ++++++++++
arch/powerpc/kernel/process.c | 21 ++++++++++++++++++++-
arch/powerpc/mm/pgtable.c | 14 ++++----------
arch/powerpc/mm/tlb_hash32.c | 2 +-
arch/powerpc/mm/tlb_hash64.c | 12 +++++++-----
arch/powerpc/mm/tlb_nohash.c | 2 +-
8 files changed, 47 insertions(+), 20 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,6 +28,16 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
+#define HAVE_ARCH_MMU_GATHER 1
+
+struct pte_freelist_batch;
+
+struct arch_mmu_gather {
+ struct pte_freelist_batch *batch;
+};
+
+#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
+
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/process.c
+++ linux-2.6/arch/powerpc/kernel/process.c
@@ -393,6 +393,9 @@ struct task_struct *__switch_to(struct t
struct thread_struct *new_thread, *old_thread;
unsigned long flags;
struct task_struct *last;
+#ifdef CONFIG_PPC64
+ struct ppc64_tlb_batch *batch;
+#endif
#ifdef CONFIG_SMP
/* avoid complexity of lazy save/restore of fpu
@@ -511,7 +514,15 @@ struct task_struct *__switch_to(struct t
old_thread->accum_tb += (current_tb - start_tb);
new_thread->start_tb = current_tb;
}
-#endif
+
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ if (batch->active) {
+ current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ }
+#endif /* CONFIG_PPC64 */
local_irq_save(flags);
@@ -527,6 +538,14 @@ struct task_struct *__switch_to(struct t
hard_irq_disable();
last = _switch(old_thread, new_thread);
+#ifdef CONFIG_PPC64
+ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+ current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch->active = 1;
+ }
+#endif /* CONFIG_PPC64 */
+
local_irq_restore(flags);
return last;
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,8 +33,6 @@
#include "mmu_decl.h"
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
#ifdef CONFIG_SMP
/*
@@ -43,7 +41,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_ga
* freeing a page table page that is being walked without locks
*/
-static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
static unsigned long pte_freelist_forced_free;
struct pte_freelist_batch
@@ -97,12 +94,10 @@ static void pte_free_submit(struct pte_f
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
- /* This is safe since tlb_gather_mmu has disabled preemption */
- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+ struct pte_freelist_batch **batchp = &tlb->arch.batch;
unsigned long pgf;
- if (atomic_read(&tlb->mm->mm_users) < 2 ||
- cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
pgtable_free(table, shift);
return;
}
@@ -124,10 +119,9 @@ void pgtable_free_tlb(struct mmu_gather
}
}
-void pte_free_finish(void)
+void pte_free_finish(struct mmu_gather *tlb)
{
- /* This is safe since tlb_gather_mmu has disabled preemption */
- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+ struct pte_freelist_batch **batchp = &tlb->arch.batch;
if (*batchp == NULL)
return;
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, p
* neesd to be flushed. This function will either perform the flush
* immediately or will batch it up if the current CPU has an active
* batch on it.
- *
- * Must be called from within some kind of spinlock/non-preempt region...
*/
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
unsigned long vsid, vaddr;
unsigned int psize;
int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *m
*/
if (!batch->active) {
flush_hash_page(vaddr, rpte, psize, ssize, 0);
+ put_cpu_var(ppc64_tlb_batch);
return;
}
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *m
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
__flush_tlb_pending(batch);
+ put_cpu_var(ppc64_tlb_batch);
}
/*
@@ -155,7 +155,7 @@ void __flush_tlb_pending(struct ppc64_tl
void tlb_flush(struct mmu_gather *tlb)
{
- struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
/* If there's a TLB batch pending, then we must flush it because the
* pages are going to be freed and we really don't want to have a CPU
@@ -164,8 +164,10 @@ void tlb_flush(struct mmu_gather *tlb)
if (tlbbatch->index)
__flush_tlb_pending(tlbbatch);
+ put_cpu_var(ppc64_tlb_batch);
+
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/thread_info.h
+++ linux-2.6/arch/powerpc/include/asm/thread_info.h
@@ -139,10 +139,12 @@ static inline struct thread_info *curren
#define TLF_NAPPING 0 /* idle thread enabled NAP mode */
#define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */
#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */
+#define TLF_LAZY_MMU 3 /* tlb_batch is active */
#define _TLF_NAPPING (1 << TLF_NAPPING)
#define _TLF_SLEEPING (1 << TLF_SLEEPING)
#define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK)
+#define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU)
#ifndef __ASSEMBLY__
#define HAVE_SET_RESTORE_SIGMASK 1
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -32,13 +32,13 @@ static inline void pte_free(struct mm_st
#ifdef CONFIG_SMP
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(void);
+extern void pte_free_finish(struct mmu_gather *tlb);
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(void) { }
+static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -73,7 +73,7 @@ void tlb_flush(struct mmu_gather *tlb)
}
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -301,7 +301,7 @@ void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm(tlb->mm);
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 08/20] powerpc: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 08/20] powerpc: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-power.patch --]
[-- Type: text/plain, Size: 8705 bytes --]
Fix up powerpc to the new mmu_gather stuffs.
PPC has an extra batching queue to RCU free the actual pagetable
allocations, use the ARCH extentions for that for now.
For the ppc64_tlb_batch, which tracks the vaddrs to unhash from the
hardware hash-table, keep using per-cpu arrays but flush on context
switch and use a TLF bit to track the laxy_mmu state.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/powerpc/include/asm/pgalloc.h | 4 ++--
arch/powerpc/include/asm/thread_info.h | 2 ++
arch/powerpc/include/asm/tlb.h | 10 ++++++++++
arch/powerpc/kernel/process.c | 21 ++++++++++++++++++++-
arch/powerpc/mm/pgtable.c | 14 ++++----------
arch/powerpc/mm/tlb_hash32.c | 2 +-
arch/powerpc/mm/tlb_hash64.c | 12 +++++++-----
arch/powerpc/mm/tlb_nohash.c | 2 +-
8 files changed, 47 insertions(+), 20 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,6 +28,16 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
+#define HAVE_ARCH_MMU_GATHER 1
+
+struct pte_freelist_batch;
+
+struct arch_mmu_gather {
+ struct pte_freelist_batch *batch;
+};
+
+#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
+
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/kernel/process.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/process.c
+++ linux-2.6/arch/powerpc/kernel/process.c
@@ -393,6 +393,9 @@ struct task_struct *__switch_to(struct t
struct thread_struct *new_thread, *old_thread;
unsigned long flags;
struct task_struct *last;
+#ifdef CONFIG_PPC64
+ struct ppc64_tlb_batch *batch;
+#endif
#ifdef CONFIG_SMP
/* avoid complexity of lazy save/restore of fpu
@@ -511,7 +514,15 @@ struct task_struct *__switch_to(struct t
old_thread->accum_tb += (current_tb - start_tb);
new_thread->start_tb = current_tb;
}
-#endif
+
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ if (batch->active) {
+ current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ }
+#endif /* CONFIG_PPC64 */
local_irq_save(flags);
@@ -527,6 +538,14 @@ struct task_struct *__switch_to(struct t
hard_irq_disable();
last = _switch(old_thread, new_thread);
+#ifdef CONFIG_PPC64
+ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+ current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch->active = 1;
+ }
+#endif /* CONFIG_PPC64 */
+
local_irq_restore(flags);
return last;
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,8 +33,6 @@
#include "mmu_decl.h"
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
#ifdef CONFIG_SMP
/*
@@ -43,7 +41,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_ga
* freeing a page table page that is being walked without locks
*/
-static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
static unsigned long pte_freelist_forced_free;
struct pte_freelist_batch
@@ -97,12 +94,10 @@ static void pte_free_submit(struct pte_f
void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
- /* This is safe since tlb_gather_mmu has disabled preemption */
- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+ struct pte_freelist_batch **batchp = &tlb->arch.batch;
unsigned long pgf;
- if (atomic_read(&tlb->mm->mm_users) < 2 ||
- cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
pgtable_free(table, shift);
return;
}
@@ -124,10 +119,9 @@ void pgtable_free_tlb(struct mmu_gather
}
}
-void pte_free_finish(void)
+void pte_free_finish(struct mmu_gather *tlb)
{
- /* This is safe since tlb_gather_mmu has disabled preemption */
- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+ struct pte_freelist_batch **batchp = &tlb->arch.batch;
if (*batchp == NULL)
return;
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, p
* neesd to be flushed. This function will either perform the flush
* immediately or will batch it up if the current CPU has an active
* batch on it.
- *
- * Must be called from within some kind of spinlock/non-preempt region...
*/
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
unsigned long vsid, vaddr;
unsigned int psize;
int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *m
*/
if (!batch->active) {
flush_hash_page(vaddr, rpte, psize, ssize, 0);
+ put_cpu_var(ppc64_tlb_batch);
return;
}
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *m
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
__flush_tlb_pending(batch);
+ put_cpu_var(ppc64_tlb_batch);
}
/*
@@ -155,7 +155,7 @@ void __flush_tlb_pending(struct ppc64_tl
void tlb_flush(struct mmu_gather *tlb)
{
- struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
/* If there's a TLB batch pending, then we must flush it because the
* pages are going to be freed and we really don't want to have a CPU
@@ -164,8 +164,10 @@ void tlb_flush(struct mmu_gather *tlb)
if (tlbbatch->index)
__flush_tlb_pending(tlbbatch);
+ put_cpu_var(ppc64_tlb_batch);
+
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/include/asm/thread_info.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/thread_info.h
+++ linux-2.6/arch/powerpc/include/asm/thread_info.h
@@ -139,10 +139,12 @@ static inline struct thread_info *curren
#define TLF_NAPPING 0 /* idle thread enabled NAP mode */
#define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */
#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */
+#define TLF_LAZY_MMU 3 /* tlb_batch is active */
#define _TLF_NAPPING (1 << TLF_NAPPING)
#define _TLF_SLEEPING (1 << TLF_SLEEPING)
#define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK)
+#define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU)
#ifndef __ASSEMBLY__
#define HAVE_SET_RESTORE_SIGMASK 1
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -32,13 +32,13 @@ static inline void pte_free(struct mm_st
#ifdef CONFIG_SMP
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(void);
+extern void pte_free_finish(struct mmu_gather *tlb);
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(void) { }
+static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -73,7 +73,7 @@ void tlb_flush(struct mmu_gather *tlb)
}
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -301,7 +301,7 @@ void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm(tlb->mm);
/* Push out batch of freed page tables */
- pte_free_finish();
+ pte_free_finish(tlb);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 09/20] sparc: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (8 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 08/20] powerpc: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 10/20] s390: preemptible mmu_gather Peter Zijlstra
` (11 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-sparc.patch --]
[-- Type: text/plain, Size: 8844 bytes --]
Rework the sparc mmu_gather usage to conform to the new world order :-)
Sparc mmu_gather does two things:
- tracks vaddrs to unhash
- tracks pages to free
Split these two things like powerpc has done and keep the vaddrs
in per-cpu data structures and flush them on context switch.
The remaining bits can then use the generic mmu_gather.
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/sparc/include/asm/pgalloc_64.h | 3 +
arch/sparc/include/asm/tlb_64.h | 91 ++---------------------------------
arch/sparc/include/asm/tlbflush_64.h | 12 +++-
arch/sparc/mm/tlb.c | 42 +++++++++-------
arch/sparc/mm/tsb.c | 15 +++--
5 files changed, 51 insertions(+), 112 deletions(-)
Index: linux-2.6/arch/sparc/include/asm/pgalloc_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/pgalloc_64.h
+++ linux-2.6/arch/sparc/include/asm/pgalloc_64.h
@@ -78,4 +78,7 @@ static inline void check_pgt_cache(void)
quicklist_trim(0, NULL, 25, 16);
}
+#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+
#endif /* _SPARC64_PGALLOC_H */
Index: linux-2.6/arch/sparc/include/asm/tlb_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/tlb_64.h
+++ linux-2.6/arch/sparc/include/asm/tlb_64.h
@@ -7,66 +7,11 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#define TLB_BATCH_NR 192
-
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
- #define FREE_PTE_NR 506
- #define tlb_fast_mode(bp) ((bp)->pages_nr == ~0U)
-#else
- #define FREE_PTE_NR 1
- #define tlb_fast_mode(bp) 1
-#endif
-
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int pages_nr;
- unsigned int need_flush;
- unsigned int fullmm;
- unsigned int tlb_nr;
- unsigned long vaddrs[TLB_BATCH_NR];
- struct page *pages[FREE_PTE_NR];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
#ifdef CONFIG_SMP
extern void smp_flush_tlb_pending(struct mm_struct *,
unsigned long, unsigned long *);
#endif
-extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
-extern void flush_tlb_pending(void);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
-{
- struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
-
- BUG_ON(mp->tlb_nr);
-
- mp->mm = mm;
- mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
- mp->fullmm = full_mm_flush;
-
- return mp;
-}
-
-
-static inline void tlb_flush_mmu(struct mmu_gather *mp)
-{
- if (!mp->fullmm)
- flush_tlb_pending();
- if (mp->need_flush) {
- free_pages_and_swap_cache(mp->pages, mp->pages_nr);
- mp->pages_nr = 0;
- mp->need_flush = 0;
- }
-
-}
-
#ifdef CONFIG_SMP
extern void smp_flush_tlb_mm(struct mm_struct *mm);
#define do_flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
@@ -74,38 +19,14 @@ extern void smp_flush_tlb_mm(struct mm_s
#define do_flush_tlb_mm(mm) __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT)
#endif
-static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
-{
- tlb_flush_mmu(mp);
-
- if (mp->fullmm)
- mp->fullmm = 0;
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
-{
- if (tlb_fast_mode(mp)) {
- free_page_and_swap_cache(page);
- return;
- }
- mp->need_flush = 1;
- mp->pages[mp->pages_nr++] = page;
- if (mp->pages_nr >= FREE_PTE_NR)
- tlb_flush_mmu(mp);
-}
-
-#define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp, ptepage, addr) pte_free((mp)->mm, ptepage)
-#define pmd_free_tlb(mp, pmdp, addr) pmd_free((mp)->mm, pmdp)
-#define pud_free_tlb(tlb,pudp, addr) __pud_free_tlb(tlb,pudp,addr)
+extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
+extern void flush_tlb_pending(void);
-#define tlb_migrate_finish(mm) do { } while (0)
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_pending()
+
+#include <asm-generic/tlb.h>
#endif /* _SPARC64_TLB_H */
Index: linux-2.6/arch/sparc/include/asm/tlbflush_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/tlbflush_64.h
+++ linux-2.6/arch/sparc/include/asm/tlbflush_64.h
@@ -5,9 +5,17 @@
#include <asm/mmu_context.h>
/* TSB flush operations. */
-struct mmu_gather;
+
+#define TLB_BATCH_NR 192
+
+struct tlb_batch {
+ struct mm_struct *mm;
+ unsigned long tlb_nr;
+ unsigned long vaddrs[TLB_BATCH_NR];
+};
+
extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
-extern void flush_tsb_user(struct mmu_gather *mp);
+extern void flush_tsb_user(struct tlb_batch *tb);
/* TLB flush operations. */
Index: linux-2.6/arch/sparc/mm/tlb.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/tlb.c
+++ linux-2.6/arch/sparc/mm/tlb.c
@@ -19,33 +19,33 @@
/* Heavily inspired by the ppc64 code. */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
void flush_tlb_pending(void)
{
- struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
- if (mp->tlb_nr) {
- flush_tsb_user(mp);
+ if (tb->tlb_nr) {
+ flush_tsb_user(tb);
- if (CTX_VALID(mp->mm->context)) {
+ if (CTX_VALID(tb->mm->context)) {
#ifdef CONFIG_SMP
- smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
- &mp->vaddrs[0]);
+ smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
+ &tb->vaddrs[0]);
#else
- __flush_tlb_pending(CTX_HWBITS(mp->mm->context),
- mp->tlb_nr, &mp->vaddrs[0]);
+ __flush_tlb_pending(CTX_HWBITS(tb->mm->context),
+ tb->tlb_nr, &tb->vaddrs[0]);
#endif
}
- mp->tlb_nr = 0;
+ tb->tlb_nr = 0;
}
- put_cpu_var(mmu_gathers);
+ put_cpu_var(tlb_batch);
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig)
{
- struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
@@ -77,21 +77,27 @@ void tlb_batch_add(struct mm_struct *mm,
no_cache_flush:
- if (mp->fullmm)
+ /*
+ if (tb->fullmm) {
+ put_cpu_var(tlb_batch);
return;
+ }
+ */
- nr = mp->tlb_nr;
+ nr = tb->tlb_nr;
- if (unlikely(nr != 0 && mm != mp->mm)) {
+ if (unlikely(nr != 0 && mm != tb->mm)) {
flush_tlb_pending();
nr = 0;
}
if (nr == 0)
- mp->mm = mm;
+ tb->mm = mm;
- mp->vaddrs[nr] = vaddr;
- mp->tlb_nr = ++nr;
+ tb->vaddrs[nr] = vaddr;
+ tb->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
+
+ put_cpu_var(tlb_batch);
}
Index: linux-2.6/arch/sparc/mm/tsb.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/tsb.c
+++ linux-2.6/arch/sparc/mm/tsb.c
@@ -47,12 +47,13 @@ void flush_tsb_kernel_range(unsigned lon
}
}
-static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
+static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+ unsigned long tsb, unsigned long nentries)
{
unsigned long i;
- for (i = 0; i < mp->tlb_nr; i++) {
- unsigned long v = mp->vaddrs[i];
+ for (i = 0; i < tb->tlb_nr; i++) {
+ unsigned long v = tb->vaddrs[i];
unsigned long tag, ent, hash;
v &= ~0x1UL;
@@ -65,9 +66,9 @@ static void __flush_tsb_one(struct mmu_g
}
}
-void flush_tsb_user(struct mmu_gather *mp)
+void flush_tsb_user(struct tlb_batch *tb)
{
- struct mm_struct *mm = mp->mm;
+ struct mm_struct *mm = tb->mm;
unsigned long nentries, base, flags;
spin_lock_irqsave(&mm->context.lock, flags);
@@ -76,7 +77,7 @@ void flush_tsb_user(struct mmu_gather *m
nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
+ __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
#ifdef CONFIG_HUGETLB_PAGE
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
@@ -84,7 +85,7 @@ void flush_tsb_user(struct mmu_gather *m
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
+ __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 09/20] sparc: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 09/20] sparc: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-sparc.patch --]
[-- Type: text/plain, Size: 8846 bytes --]
Rework the sparc mmu_gather usage to conform to the new world order :-)
Sparc mmu_gather does two things:
- tracks vaddrs to unhash
- tracks pages to free
Split these two things like powerpc has done and keep the vaddrs
in per-cpu data structures and flush them on context switch.
The remaining bits can then use the generic mmu_gather.
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/sparc/include/asm/pgalloc_64.h | 3 +
arch/sparc/include/asm/tlb_64.h | 91 ++---------------------------------
arch/sparc/include/asm/tlbflush_64.h | 12 +++-
arch/sparc/mm/tlb.c | 42 +++++++++-------
arch/sparc/mm/tsb.c | 15 +++--
5 files changed, 51 insertions(+), 112 deletions(-)
Index: linux-2.6/arch/sparc/include/asm/pgalloc_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/pgalloc_64.h
+++ linux-2.6/arch/sparc/include/asm/pgalloc_64.h
@@ -78,4 +78,7 @@ static inline void check_pgt_cache(void)
quicklist_trim(0, NULL, 25, 16);
}
+#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
+
#endif /* _SPARC64_PGALLOC_H */
Index: linux-2.6/arch/sparc/include/asm/tlb_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/tlb_64.h
+++ linux-2.6/arch/sparc/include/asm/tlb_64.h
@@ -7,66 +7,11 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#define TLB_BATCH_NR 192
-
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
- #define FREE_PTE_NR 506
- #define tlb_fast_mode(bp) ((bp)->pages_nr == ~0U)
-#else
- #define FREE_PTE_NR 1
- #define tlb_fast_mode(bp) 1
-#endif
-
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int pages_nr;
- unsigned int need_flush;
- unsigned int fullmm;
- unsigned int tlb_nr;
- unsigned long vaddrs[TLB_BATCH_NR];
- struct page *pages[FREE_PTE_NR];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
#ifdef CONFIG_SMP
extern void smp_flush_tlb_pending(struct mm_struct *,
unsigned long, unsigned long *);
#endif
-extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
-extern void flush_tlb_pending(void);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
-{
- struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
-
- BUG_ON(mp->tlb_nr);
-
- mp->mm = mm;
- mp->pages_nr = num_online_cpus() > 1 ? 0U : ~0U;
- mp->fullmm = full_mm_flush;
-
- return mp;
-}
-
-
-static inline void tlb_flush_mmu(struct mmu_gather *mp)
-{
- if (!mp->fullmm)
- flush_tlb_pending();
- if (mp->need_flush) {
- free_pages_and_swap_cache(mp->pages, mp->pages_nr);
- mp->pages_nr = 0;
- mp->need_flush = 0;
- }
-
-}
-
#ifdef CONFIG_SMP
extern void smp_flush_tlb_mm(struct mm_struct *mm);
#define do_flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
@@ -74,38 +19,14 @@ extern void smp_flush_tlb_mm(struct mm_s
#define do_flush_tlb_mm(mm) __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT)
#endif
-static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, unsigned long end)
-{
- tlb_flush_mmu(mp);
-
- if (mp->fullmm)
- mp->fullmm = 0;
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
-{
- if (tlb_fast_mode(mp)) {
- free_page_and_swap_cache(page);
- return;
- }
- mp->need_flush = 1;
- mp->pages[mp->pages_nr++] = page;
- if (mp->pages_nr >= FREE_PTE_NR)
- tlb_flush_mmu(mp);
-}
-
-#define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp, ptepage, addr) pte_free((mp)->mm, ptepage)
-#define pmd_free_tlb(mp, pmdp, addr) pmd_free((mp)->mm, pmdp)
-#define pud_free_tlb(tlb,pudp, addr) __pud_free_tlb(tlb,pudp,addr)
+extern void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
+extern void flush_tlb_pending(void);
-#define tlb_migrate_finish(mm) do { } while (0)
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_pending()
+
+#include <asm-generic/tlb.h>
#endif /* _SPARC64_TLB_H */
Index: linux-2.6/arch/sparc/include/asm/tlbflush_64.h
===================================================================
--- linux-2.6.orig/arch/sparc/include/asm/tlbflush_64.h
+++ linux-2.6/arch/sparc/include/asm/tlbflush_64.h
@@ -5,9 +5,17 @@
#include <asm/mmu_context.h>
/* TSB flush operations. */
-struct mmu_gather;
+
+#define TLB_BATCH_NR 192
+
+struct tlb_batch {
+ struct mm_struct *mm;
+ unsigned long tlb_nr;
+ unsigned long vaddrs[TLB_BATCH_NR];
+};
+
extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
-extern void flush_tsb_user(struct mmu_gather *mp);
+extern void flush_tsb_user(struct tlb_batch *tb);
/* TLB flush operations. */
Index: linux-2.6/arch/sparc/mm/tlb.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/tlb.c
+++ linux-2.6/arch/sparc/mm/tlb.c
@@ -19,33 +19,33 @@
/* Heavily inspired by the ppc64 code. */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
void flush_tlb_pending(void)
{
- struct mmu_gather *mp = &get_cpu_var(mmu_gathers);
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
- if (mp->tlb_nr) {
- flush_tsb_user(mp);
+ if (tb->tlb_nr) {
+ flush_tsb_user(tb);
- if (CTX_VALID(mp->mm->context)) {
+ if (CTX_VALID(tb->mm->context)) {
#ifdef CONFIG_SMP
- smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
- &mp->vaddrs[0]);
+ smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
+ &tb->vaddrs[0]);
#else
- __flush_tlb_pending(CTX_HWBITS(mp->mm->context),
- mp->tlb_nr, &mp->vaddrs[0]);
+ __flush_tlb_pending(CTX_HWBITS(tb->mm->context),
+ tb->tlb_nr, &tb->vaddrs[0]);
#endif
}
- mp->tlb_nr = 0;
+ tb->tlb_nr = 0;
}
- put_cpu_var(mmu_gathers);
+ put_cpu_var(tlb_batch);
}
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig)
{
- struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
+ struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
@@ -77,21 +77,27 @@ void tlb_batch_add(struct mm_struct *mm,
no_cache_flush:
- if (mp->fullmm)
+ /*
+ if (tb->fullmm) {
+ put_cpu_var(tlb_batch);
return;
+ }
+ */
- nr = mp->tlb_nr;
+ nr = tb->tlb_nr;
- if (unlikely(nr != 0 && mm != mp->mm)) {
+ if (unlikely(nr != 0 && mm != tb->mm)) {
flush_tlb_pending();
nr = 0;
}
if (nr == 0)
- mp->mm = mm;
+ tb->mm = mm;
- mp->vaddrs[nr] = vaddr;
- mp->tlb_nr = ++nr;
+ tb->vaddrs[nr] = vaddr;
+ tb->tlb_nr = ++nr;
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
+
+ put_cpu_var(tlb_batch);
}
Index: linux-2.6/arch/sparc/mm/tsb.c
===================================================================
--- linux-2.6.orig/arch/sparc/mm/tsb.c
+++ linux-2.6/arch/sparc/mm/tsb.c
@@ -47,12 +47,13 @@ void flush_tsb_kernel_range(unsigned lon
}
}
-static void __flush_tsb_one(struct mmu_gather *mp, unsigned long hash_shift, unsigned long tsb, unsigned long nentries)
+static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+ unsigned long tsb, unsigned long nentries)
{
unsigned long i;
- for (i = 0; i < mp->tlb_nr; i++) {
- unsigned long v = mp->vaddrs[i];
+ for (i = 0; i < tb->tlb_nr; i++) {
+ unsigned long v = tb->vaddrs[i];
unsigned long tag, ent, hash;
v &= ~0x1UL;
@@ -65,9 +66,9 @@ static void __flush_tsb_one(struct mmu_g
}
}
-void flush_tsb_user(struct mmu_gather *mp)
+void flush_tsb_user(struct tlb_batch *tb)
{
- struct mm_struct *mm = mp->mm;
+ struct mm_struct *mm = tb->mm;
unsigned long nentries, base, flags;
spin_lock_irqsave(&mm->context.lock, flags);
@@ -76,7 +77,7 @@ void flush_tsb_user(struct mmu_gather *m
nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(mp, PAGE_SHIFT, base, nentries);
+ __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
#ifdef CONFIG_HUGETLB_PAGE
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
@@ -84,7 +85,7 @@ void flush_tsb_user(struct mmu_gather *m
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
if (tlb_type == cheetah_plus || tlb_type == hypervisor)
base = __pa(base);
- __flush_tsb_one(mp, HPAGE_SHIFT, base, nentries);
+ __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
}
#endif
spin_unlock_irqrestore(&mm->context.lock, flags);
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 10/20] s390: preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (9 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 09/20] sparc: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 11/20] arm: Preemptible mmu_gather Peter Zijlstra
` (10 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky
[-- Attachment #1: martin-mm-preempt-tlb-gather-s390.patch --]
[-- Type: text/plain, Size: 2607 bytes --]
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Adapt the stand-alone s390 mmu_gather implementation to the new
preemptible mmu_gather interface.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/s390/include/asm/tlb.h | 43 +++++++++++++++++++++++++------------------
1 file changed, 25 insertions(+), 18 deletions(-)
Index: linux-2.6/arch/s390/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/tlb.h
+++ linux-2.6/arch/s390/include/asm/tlb.h
@@ -28,44 +28,50 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
-#ifndef CONFIG_SMP
-#define TLB_NR_PTRS 1
-#else
-#define TLB_NR_PTRS 508
-#endif
-
struct mmu_gather {
struct mm_struct *mm;
unsigned int fullmm;
unsigned int nr_ptes;
unsigned int nr_pxds;
- void *array[TLB_NR_PTRS];
+ unsigned int max;
+ void **array;
+ void *local[8];
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
- unsigned int full_mm_flush)
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->array = (void *) addr;
+ tlb->max = PAGE_SIZE / sizeof(void *);
+ }
+}
+static inline void tlb_gather_mmu(struct mmu_gather *tlb,
+ struct mm_struct *mm,
+ unsigned int full_mm_flush)
+{
tlb->mm = mm;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->array = tlb->local;
tlb->fullmm = full_mm_flush;
- tlb->nr_ptes = 0;
- tlb->nr_pxds = TLB_NR_PTRS;
if (tlb->fullmm)
__tlb_flush_mm(mm);
- return tlb;
+ else
+ __tlb_alloc_pages(tlb);
+ tlb->nr_ptes = 0;
+ tlb->nr_pxds = tlb->max;
}
static inline void tlb_flush_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
- if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS))
+ if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
__tlb_flush_mm(tlb->mm);
while (tlb->nr_ptes > 0)
pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]);
- while (tlb->nr_pxds < TLB_NR_PTRS)
+ while (tlb->nr_pxds < tlb->max)
/* pgd_free frees the pointer as region or segment table */
pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]);
}
@@ -78,7 +84,8 @@ static inline void tlb_finish_mmu(struct
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->array != tlb->local)
+ free_pages((unsigned long) tlb->array, 0);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 10/20] s390: preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 10/20] s390: preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Martin Schwidefsky
[-- Attachment #1: martin-mm-preempt-tlb-gather-s390.patch --]
[-- Type: text/plain, Size: 2609 bytes --]
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Adapt the stand-alone s390 mmu_gather implementation to the new
preemptible mmu_gather interface.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/s390/include/asm/tlb.h | 43 +++++++++++++++++++++++++------------------
1 file changed, 25 insertions(+), 18 deletions(-)
Index: linux-2.6/arch/s390/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/s390/include/asm/tlb.h
+++ linux-2.6/arch/s390/include/asm/tlb.h
@@ -28,44 +28,50 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
-#ifndef CONFIG_SMP
-#define TLB_NR_PTRS 1
-#else
-#define TLB_NR_PTRS 508
-#endif
-
struct mmu_gather {
struct mm_struct *mm;
unsigned int fullmm;
unsigned int nr_ptes;
unsigned int nr_pxds;
- void *array[TLB_NR_PTRS];
+ unsigned int max;
+ void **array;
+ void *local[8];
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
- unsigned int full_mm_flush)
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->array = (void *) addr;
+ tlb->max = PAGE_SIZE / sizeof(void *);
+ }
+}
+static inline void tlb_gather_mmu(struct mmu_gather *tlb,
+ struct mm_struct *mm,
+ unsigned int full_mm_flush)
+{
tlb->mm = mm;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->array = tlb->local;
tlb->fullmm = full_mm_flush;
- tlb->nr_ptes = 0;
- tlb->nr_pxds = TLB_NR_PTRS;
if (tlb->fullmm)
__tlb_flush_mm(mm);
- return tlb;
+ else
+ __tlb_alloc_pages(tlb);
+ tlb->nr_ptes = 0;
+ tlb->nr_pxds = tlb->max;
}
static inline void tlb_flush_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
- if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS))
+ if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
__tlb_flush_mm(tlb->mm);
while (tlb->nr_ptes > 0)
pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]);
- while (tlb->nr_pxds < TLB_NR_PTRS)
+ while (tlb->nr_pxds < tlb->max)
/* pgd_free frees the pointer as region or segment table */
pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]);
}
@@ -78,7 +84,8 @@ static inline void tlb_finish_mmu(struct
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->array != tlb->local)
+ free_pages((unsigned long) tlb->array, 0);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 11/20] arm: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (10 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 10/20] s390: preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 12/20] sh: " Peter Zijlstra
` (9 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Russell King
[-- Attachment #1: mm-preempt-tlb-gather-arm.patch --]
[-- Type: text/plain, Size: 1109 bytes --]
Fix up the arm mmu_gahter code to conform to the new API.
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/arm/include/asm/tlb.h | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
Index: linux-2.6/arch/arm/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/tlb.h
+++ linux-2.6/arch/arm/include/asm/tlb.h
@@ -40,17 +40,11 @@ struct mmu_gather {
unsigned long range_end;
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
-
- return tlb;
}
static inline void
@@ -61,8 +55,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 11/20] arm: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 11/20] arm: Preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Russell King
[-- Attachment #1: mm-preempt-tlb-gather-arm.patch --]
[-- Type: text/plain, Size: 1111 bytes --]
Fix up the arm mmu_gahter code to conform to the new API.
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/arm/include/asm/tlb.h | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
Index: linux-2.6/arch/arm/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/arm/include/asm/tlb.h
+++ linux-2.6/arch/arm/include/asm/tlb.h
@@ -40,17 +40,11 @@ struct mmu_gather {
unsigned long range_end;
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
-
- return tlb;
}
static inline void
@@ -61,8 +55,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
/*
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 12/20] sh: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (11 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 11/20] arm: Preemptible mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 13/20] um: " Peter Zijlstra
` (8 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Paul Mundt
[-- Attachment #1: mm-preempt-tlb-gather-sh.patch --]
[-- Type: text/plain, Size: 1301 bytes --]
Fix up the sh mmu_gahter code to conform to the new API.
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/sh/include/asm/tlb.h | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
Index: linux-2.6/arch/sh/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/tlb.h
+++ linux-2.6/arch/sh/include/asm/tlb.h
@@ -23,8 +23,6 @@ struct mmu_gather {
unsigned long start, end;
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
static inline void init_tlb_gather(struct mmu_gather *tlb)
{
tlb->start = TASK_SIZE;
@@ -36,17 +34,13 @@ static inline void init_tlb_gather(struc
}
}
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
init_tlb_gather(tlb);
-
- return tlb;
}
static inline void
@@ -57,8 +51,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
static inline void
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 12/20] sh: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 12/20] sh: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Paul Mundt
[-- Attachment #1: mm-preempt-tlb-gather-sh.patch --]
[-- Type: text/plain, Size: 1303 bytes --]
Fix up the sh mmu_gahter code to conform to the new API.
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/sh/include/asm/tlb.h | 12 ++----------
1 file changed, 2 insertions(+), 10 deletions(-)
Index: linux-2.6/arch/sh/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/sh/include/asm/tlb.h
+++ linux-2.6/arch/sh/include/asm/tlb.h
@@ -23,8 +23,6 @@ struct mmu_gather {
unsigned long start, end;
};
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
static inline void init_tlb_gather(struct mmu_gather *tlb)
{
tlb->start = TASK_SIZE;
@@ -36,17 +34,13 @@ static inline void init_tlb_gather(struc
}
}
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
init_tlb_gather(tlb);
-
- return tlb;
}
static inline void
@@ -57,8 +51,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
static inline void
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 13/20] um: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (12 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 12/20] sh: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 14/20] ia64: " Peter Zijlstra
` (7 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Jeff Dike
[-- Attachment #1: mm-preempt-tlb-gather-um.patch --]
[-- Type: text/plain, Size: 1574 bytes --]
Fix up the um mmu_gather code to conform to the new API.
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/um/include/asm/tlb.h | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
Index: linux-2.6/arch/um/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/tlb.h
+++ linux-2.6/arch/um/include/asm/tlb.h
@@ -22,9 +22,6 @@ struct mmu_gather {
unsigned int fullmm; /* non-zero means full mm flush */
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
unsigned long address)
{
@@ -47,20 +44,13 @@ static inline void init_tlb_gather(struc
}
}
-/* tlb_gather_mmu
- * Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
init_tlb_gather(tlb);
-
- return tlb;
}
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -87,8 +77,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
/* tlb_remove_page
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 13/20] um: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 13/20] um: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Jeff Dike
[-- Attachment #1: mm-preempt-tlb-gather-um.patch --]
[-- Type: text/plain, Size: 1576 bytes --]
Fix up the um mmu_gather code to conform to the new API.
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/um/include/asm/tlb.h | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
Index: linux-2.6/arch/um/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/um/include/asm/tlb.h
+++ linux-2.6/arch/um/include/asm/tlb.h
@@ -22,9 +22,6 @@ struct mmu_gather {
unsigned int fullmm; /* non-zero means full mm flush */
};
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
unsigned long address)
{
@@ -47,20 +44,13 @@ static inline void init_tlb_gather(struc
}
}
-/* tlb_gather_mmu
- * Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
-
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
init_tlb_gather(tlb);
-
- return tlb;
}
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -87,8 +77,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
/* keep the page table cache within bounds */
check_pgt_cache();
-
- put_cpu_var(mmu_gathers);
}
/* tlb_remove_page
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 14/20] ia64: Preemptible mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (13 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 13/20] um: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
` (6 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather-ia64.patch --]
[-- Type: text/plain, Size: 3079 bytes --]
Fix up the ia64 mmu_gather code to conform to the new API.
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/ia64/include/asm/tlb.h | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
Index: linux-2.6/arch/ia64/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/tlb.h
+++ linux-2.6/arch/ia64/include/asm/tlb.h
@@ -47,21 +47,21 @@
#include <asm/machvec.h>
#ifdef CONFIG_SMP
-# define FREE_PTE_NR 2048
# define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
-# define FREE_PTE_NR 0
# define tlb_fast_mode(tlb) (1)
#endif
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* == ~0U => fast mode */
+ unsigned int max;
unsigned char fullmm; /* non-zero means full mm flush */
unsigned char need_flush; /* really unmapped some PTEs? */
unsigned long start_addr;
unsigned long end_addr;
- struct page *pages[FREE_PTE_NR];
+ struct page **pages;
+ struct page *local[8];
};
struct ia64_tr_entry {
@@ -90,9 +90,6 @@ extern struct ia64_tr_entry *ia64_idtrs[
#define RR_RID_MASK 0x00000000ffffff00L
#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
/*
* Flush the TLB for address range START to END and, if not in fast mode, release the
* freed pages that where gathered up to this point.
@@ -147,15 +144,23 @@ ia64_tlb_flush_mmu (struct mmu_gather *t
}
}
-/*
- * Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(void *);
+ }
+}
+
+static inline void
+tlb_gather_mmu (struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+{
tlb->mm = mm;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
/*
* Use fast mode if only 1 CPU is online.
*
@@ -172,7 +177,6 @@ tlb_gather_mmu (struct mm_struct *mm, un
tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
tlb->fullmm = full_mm_flush;
tlb->start_addr = ~0UL;
- return tlb;
}
/*
@@ -191,7 +195,8 @@ tlb_finish_mmu (struct mmu_gather *tlb,
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/*
@@ -208,8 +213,12 @@ tlb_remove_page (struct mmu_gather *tlb,
free_page_and_swap_cache(page);
return;
}
+
+ if (!tlb->nr && tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
+
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
}
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 14/20] ia64: Preemptible mmu_gather
2010-10-18 11:24 ` [PATCH 14/20] ia64: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell, Tony Luck
[-- Attachment #1: mm-preempt-tlb-gather-ia64.patch --]
[-- Type: text/plain, Size: 3081 bytes --]
Fix up the ia64 mmu_gather code to conform to the new API.
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/ia64/include/asm/tlb.h | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
Index: linux-2.6/arch/ia64/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/ia64/include/asm/tlb.h
+++ linux-2.6/arch/ia64/include/asm/tlb.h
@@ -47,21 +47,21 @@
#include <asm/machvec.h>
#ifdef CONFIG_SMP
-# define FREE_PTE_NR 2048
# define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
#else
-# define FREE_PTE_NR 0
# define tlb_fast_mode(tlb) (1)
#endif
struct mmu_gather {
struct mm_struct *mm;
unsigned int nr; /* == ~0U => fast mode */
+ unsigned int max;
unsigned char fullmm; /* non-zero means full mm flush */
unsigned char need_flush; /* really unmapped some PTEs? */
unsigned long start_addr;
unsigned long end_addr;
- struct page *pages[FREE_PTE_NR];
+ struct page **pages;
+ struct page *local[8];
};
struct ia64_tr_entry {
@@ -90,9 +90,6 @@ extern struct ia64_tr_entry *ia64_idtrs[
#define RR_RID_MASK 0x00000000ffffff00L
#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
-/* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
/*
* Flush the TLB for address range START to END and, if not in fast mode, release the
* freed pages that where gathered up to this point.
@@ -147,15 +144,23 @@ ia64_tlb_flush_mmu (struct mmu_gather *t
}
}
-/*
- * Return a pointer to an initialized struct mmu_gather.
- */
-static inline struct mmu_gather *
-tlb_gather_mmu (struct mm_struct *mm, unsigned int full_mm_flush)
+static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
{
- struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+ unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+
+ if (addr) {
+ tlb->pages = (void *)addr;
+ tlb->max = PAGE_SIZE / sizeof(void *);
+ }
+}
+
+static inline void
+tlb_gather_mmu (struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+{
tlb->mm = mm;
+ tlb->max = ARRAY_SIZE(tlb->local);
+ tlb->pages = tlb->local;
/*
* Use fast mode if only 1 CPU is online.
*
@@ -172,7 +177,6 @@ tlb_gather_mmu (struct mm_struct *mm, un
tlb->nr = (num_online_cpus() == 1) ? ~0U : 0;
tlb->fullmm = full_mm_flush;
tlb->start_addr = ~0UL;
- return tlb;
}
/*
@@ -191,7 +195,8 @@ tlb_finish_mmu (struct mmu_gather *tlb,
/* keep the page table cache within bounds */
check_pgt_cache();
- put_cpu_var(mmu_gathers);
+ if (tlb->pages != tlb->local)
+ free_pages((unsigned long)tlb->pages, 0);
}
/*
@@ -208,8 +213,12 @@ tlb_remove_page (struct mmu_gather *tlb,
free_page_and_swap_cache(page);
return;
}
+
+ if (!tlb->nr && tlb->pages == tlb->local)
+ __tlb_alloc_pages(tlb);
+
tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= FREE_PTE_NR)
+ if (tlb->nr >= tlb->max)
ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
}
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (14 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 14/20] ia64: " Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
` (5 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-rcu.patch --]
[-- Type: text/plain, Size: 12668 bytes --]
In case other architectures require RCU freed page-tables to implement
gup_fast() and software filled hashes and similar things, provide the
means to do so by moving the logic into generic code.
Requested-by: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/Kconfig | 3 +
arch/powerpc/Kconfig | 1
arch/powerpc/include/asm/pgalloc.h | 21 ++++++-
arch/powerpc/include/asm/tlb.h | 10 ---
arch/powerpc/mm/pgtable.c | 98 -------------------------------------
arch/powerpc/mm/tlb_hash32.c | 3 -
arch/powerpc/mm/tlb_hash64.c | 3 -
arch/powerpc/mm/tlb_nohash.c | 3 -
include/asm-generic/tlb.h | 57 +++++++++++++++++++--
mm/memory.c | 77 +++++++++++++++++++++++++++++
10 files changed, 151 insertions(+), 125 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,29 @@ static inline void pte_free(struct mm_st
#endif
#ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(struct mmu_gather *tlb);
+struct mmu_gather;
+extern void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,16 +28,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
-#define HAVE_ARCH_MMU_GATHER 1
-
-struct pte_freelist_batch;
-
-struct arch_mmu_gather {
- struct pte_freelist_batch *batch;
-};
-
-#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
-
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,104 +33,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_SMP
-
-/*
- * Handle batching of page table freeing on SMP. Page tables are
- * queued up and send to be freed later by RCU in order to avoid
- * freeing a page table page that is being walked without locks
- */
-
-static unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
- struct rcu_head rcu;
- unsigned int index;
- unsigned long tables[0];
-};
-
-#define PTE_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
- / sizeof(unsigned long))
-
-static void pte_free_smp_sync(void *arg)
-{
- /* Do nothing, just ensure we sync with all CPUs */
-}
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(void *table, unsigned shift)
-{
- pte_freelist_forced_free++;
-
- smp_call_function(pte_free_smp_sync, NULL, 1);
-
- pgtable_free(table, shift);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
- struct pte_freelist_batch *batch =
- container_of(head, struct pte_freelist_batch, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++) {
- void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
- }
-
- free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
- call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
- unsigned long pgf;
-
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- pgtable_free(table, shift);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
- if (*batchp == NULL) {
- pgtable_free_now(table, shift);
- return;
- }
- (*batchp)->index = 0;
- }
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf = (unsigned long)table | shift;
- (*batchp)->tables[(*batchp)->index++] = pgf;
- if ((*batchp)->index == PTE_FREELIST_SIZE) {
- pte_free_submit(*batchp);
- *batchp = NULL;
- }
-}
-
-void pte_free_finish(struct mmu_gather *tlb)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
-
- if (*batchp == NULL)
- return;
- pte_free_submit(*batchp);
- *batchp = NULL;
-}
-
-#endif /* CONFIG_SMP */
-
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
*/
_tlbia();
}
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -165,9 +165,6 @@ void tlb_flush(struct mmu_gather *tlb)
__flush_tlb_pending(tlbbatch);
put_cpu_var(ppc64_tlb_batch);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
void tlb_flush(struct mmu_gather *tlb)
{
flush_tlb_mm(tlb->mm);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -27,6 +27,49 @@
#define tlb_fast_mode(tlb) 1
#endif
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+/*
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the copmletion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
+ */
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#endif
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
@@ -36,11 +79,12 @@ struct mmu_gather {
unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
-#ifdef HAVE_ARCH_MMU_GATHER
- struct arch_mmu_gather arch;
-#endif
struct page **pages;
struct page *local[8];
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ struct mmu_table_batch *batch;
+#endif
};
static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
@@ -72,8 +116,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
tlb->fullmm = full_mm_flush;
-#ifdef HAVE_ARCH_MMU_GATHER
- tlb->arch = ARCH_MMU_GATHER_INIT;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb->batch = NULL;
#endif
}
@@ -84,6 +128,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
return;
tlb->need_flush = 0;
tlb_flush(tlb);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -193,6 +193,83 @@ static void check_sync_rss_stat(struct t
#endif
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely on
+ * IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->need_flush = 1;
+
+ /*
+ * When there's less then two users of this mm there cannot be a
+ * concurrent page-table walk.
+ */
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
+ __tlb_remove_table(table);
+ return;
+ }
+
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)__get_free_page(GFP_ATOMIC);
+ if (*batch == NULL) {
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_table_flush(tlb);
+}
+
+#endif
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
Index: linux-2.6/arch/Kconfig
===================================================================
--- linux-2.6.orig/arch/Kconfig
+++ linux-2.6/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
+config HAVE_RCU_TABLE_FREE
+ bool
+
source "kernel/gcov/Kconfig"
Index: linux-2.6/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/Kconfig
+++ linux-2.6/arch/powerpc/Kconfig
@@ -141,6 +141,7 @@ config PPC
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
+ select HAVE_RCU_TABLE_FREE if PPC64
config EARLY_PRINTK
bool
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code
2010-10-18 11:24 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-preempt-tlb-gather-rcu.patch --]
[-- Type: text/plain, Size: 12670 bytes --]
In case other architectures require RCU freed page-tables to implement
gup_fast() and software filled hashes and similar things, provide the
means to do so by moving the logic into generic code.
Requested-by: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/Kconfig | 3 +
arch/powerpc/Kconfig | 1
arch/powerpc/include/asm/pgalloc.h | 21 ++++++-
arch/powerpc/include/asm/tlb.h | 10 ---
arch/powerpc/mm/pgtable.c | 98 -------------------------------------
arch/powerpc/mm/tlb_hash32.c | 3 -
arch/powerpc/mm/tlb_hash64.c | 3 -
arch/powerpc/mm/tlb_nohash.c | 3 -
include/asm-generic/tlb.h | 57 +++++++++++++++++++--
mm/memory.c | 77 +++++++++++++++++++++++++++++
10 files changed, 151 insertions(+), 125 deletions(-)
Index: linux-2.6/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/pgalloc.h
+++ linux-2.6/arch/powerpc/include/asm/pgalloc.h
@@ -31,14 +31,29 @@ static inline void pte_free(struct mm_st
#endif
#ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
-extern void pte_free_finish(struct mmu_gather *tlb);
+struct mmu_gather;
+extern void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
#else /* CONFIG_SMP */
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
{
pgtable_free(table, shift);
}
-static inline void pte_free_finish(struct mmu_gather *tlb) { }
#endif /* !CONFIG_SMP */
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
Index: linux-2.6/arch/powerpc/include/asm/tlb.h
===================================================================
--- linux-2.6.orig/arch/powerpc/include/asm/tlb.h
+++ linux-2.6/arch/powerpc/include/asm/tlb.h
@@ -28,16 +28,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
-#define HAVE_ARCH_MMU_GATHER 1
-
-struct pte_freelist_batch;
-
-struct arch_mmu_gather {
- struct pte_freelist_batch *batch;
-};
-
-#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
-
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
Index: linux-2.6/arch/powerpc/mm/pgtable.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/pgtable.c
+++ linux-2.6/arch/powerpc/mm/pgtable.c
@@ -33,104 +33,6 @@
#include "mmu_decl.h"
-#ifdef CONFIG_SMP
-
-/*
- * Handle batching of page table freeing on SMP. Page tables are
- * queued up and send to be freed later by RCU in order to avoid
- * freeing a page table page that is being walked without locks
- */
-
-static unsigned long pte_freelist_forced_free;
-
-struct pte_freelist_batch
-{
- struct rcu_head rcu;
- unsigned int index;
- unsigned long tables[0];
-};
-
-#define PTE_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
- / sizeof(unsigned long))
-
-static void pte_free_smp_sync(void *arg)
-{
- /* Do nothing, just ensure we sync with all CPUs */
-}
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-static void pgtable_free_now(void *table, unsigned shift)
-{
- pte_freelist_forced_free++;
-
- smp_call_function(pte_free_smp_sync, NULL, 1);
-
- pgtable_free(table, shift);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
- struct pte_freelist_batch *batch =
- container_of(head, struct pte_freelist_batch, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++) {
- void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
- }
-
- free_page((unsigned long)batch);
-}
-
-static void pte_free_submit(struct pte_freelist_batch *batch)
-{
- call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
- unsigned long pgf;
-
- if (atomic_read(&tlb->mm->mm_users) < 2) {
- pgtable_free(table, shift);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
- if (*batchp == NULL) {
- pgtable_free_now(table, shift);
- return;
- }
- (*batchp)->index = 0;
- }
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf = (unsigned long)table | shift;
- (*batchp)->tables[(*batchp)->index++] = pgf;
- if ((*batchp)->index == PTE_FREELIST_SIZE) {
- pte_free_submit(*batchp);
- *batchp = NULL;
- }
-}
-
-void pte_free_finish(struct mmu_gather *tlb)
-{
- struct pte_freelist_batch **batchp = &tlb->arch.batch;
-
- if (*batchp == NULL)
- return;
- pte_free_submit(*batchp);
- *batchp = NULL;
-}
-
-#endif /* CONFIG_SMP */
-
static inline int is_exec_fault(void)
{
return current->thread.regs && TRAP(current->thread.regs) == 0x400;
Index: linux-2.6/arch/powerpc/mm/tlb_hash32.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash32.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash32.c
@@ -71,9 +71,6 @@ void tlb_flush(struct mmu_gather *tlb)
*/
_tlbia();
}
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_hash64.c
+++ linux-2.6/arch/powerpc/mm/tlb_hash64.c
@@ -165,9 +165,6 @@ void tlb_flush(struct mmu_gather *tlb)
__flush_tlb_pending(tlbbatch);
put_cpu_var(ppc64_tlb_batch);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/**
Index: linux-2.6/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-2.6.orig/arch/powerpc/mm/tlb_nohash.c
+++ linux-2.6/arch/powerpc/mm/tlb_nohash.c
@@ -299,9 +299,6 @@ EXPORT_SYMBOL(flush_tlb_range);
void tlb_flush(struct mmu_gather *tlb)
{
flush_tlb_mm(tlb->mm);
-
- /* Push out batch of freed page tables */
- pte_free_finish(tlb);
}
/*
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -27,6 +27,49 @@
#define tlb_fast_mode(tlb) 1
#endif
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+/*
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the copmletion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
+ */
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#endif
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
@@ -36,11 +79,12 @@ struct mmu_gather {
unsigned int max; /* nr < max */
unsigned int need_flush;/* Really unmapped some ptes? */
unsigned int fullmm; /* non-zero means full mm flush */
-#ifdef HAVE_ARCH_MMU_GATHER
- struct arch_mmu_gather arch;
-#endif
struct page **pages;
struct page *local[8];
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ struct mmu_table_batch *batch;
+#endif
};
static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
@@ -72,8 +116,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
tlb->fullmm = full_mm_flush;
-#ifdef HAVE_ARCH_MMU_GATHER
- tlb->arch = ARCH_MMU_GATHER_INIT;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb->batch = NULL;
#endif
}
@@ -84,6 +128,9 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
return;
tlb->need_flush = 0;
tlb_flush(tlb);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
if (!tlb_fast_mode(tlb)) {
free_pages_and_swap_cache(tlb->pages, tlb->nr);
tlb->nr = 0;
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -193,6 +193,83 @@ static void check_sync_rss_stat(struct t
#endif
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely on
+ * IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->need_flush = 1;
+
+ /*
+ * When there's less then two users of this mm there cannot be a
+ * concurrent page-table walk.
+ */
+ if (atomic_read(&tlb->mm->mm_users) < 2) {
+ __tlb_remove_table(table);
+ return;
+ }
+
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)__get_free_page(GFP_ATOMIC);
+ if (*batch == NULL) {
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_table_flush(tlb);
+}
+
+#endif
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
Index: linux-2.6/arch/Kconfig
===================================================================
--- linux-2.6.orig/arch/Kconfig
+++ linux-2.6/arch/Kconfig
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
+config HAVE_RCU_TABLE_FREE
+ bool
+
source "kernel/gcov/Kconfig"
Index: linux-2.6/arch/powerpc/Kconfig
===================================================================
--- linux-2.6.orig/arch/powerpc/Kconfig
+++ linux-2.6/arch/powerpc/Kconfig
@@ -141,6 +141,7 @@ config PPC
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
+ select HAVE_RCU_TABLE_FREE if PPC64
config EARLY_PRINTK
bool
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (15 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 17/20] mutex: Provide mutex_is_contended Peter Zijlstra
` (4 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mutex_lock_nest_lock.patch --]
[-- Type: text/plain, Size: 5440 bytes --]
Provide the mutex_lock_nest_lock() annotation.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/lockdep.h | 3 +++
include/linux/mutex.h | 9 +++++++++
kernel/mutex.c | 25 +++++++++++++++++--------
3 files changed, 29 insertions(+), 8 deletions(-)
Index: linux-2.6/include/linux/lockdep.h
===================================================================
--- linux-2.6.orig/include/linux/lockdep.h
+++ linux-2.6/include/linux/lockdep.h
@@ -492,12 +492,15 @@ static inline void print_irqtrace_events
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# ifdef CONFIG_PROVE_LOCKING
# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
+# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
# else
# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
+# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
# endif
# define mutex_release(l, n, i) lock_release(l, n, i)
#else
# define mutex_acquire(l, s, t, i) do { } while (0)
+# define mutex_acquire_nest(l, s, t, n, i) do { } while (0)
# define mutex_release(l, n, i) do { } while (0)
#endif
Index: linux-2.6/include/linux/mutex.h
===================================================================
--- linux-2.6.orig/include/linux/mutex.h
+++ linux-2.6/include/linux/mutex.h
@@ -124,6 +124,7 @@ static inline int mutex_is_locked(struct
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
@@ -132,6 +133,13 @@ extern int __must_check mutex_lock_killa
#define mutex_lock(lock) mutex_lock_nested(lock, 0)
#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+
+#define mutex_lock_nest_lock(lock, nest_lock) \
+do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
+} while (0)
+
#else
extern void mutex_lock(struct mutex *lock);
extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -140,6 +148,7 @@ extern int __must_check mutex_lock_killa
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
#endif
/*
Index: linux-2.6/kernel/mutex.c
===================================================================
--- linux-2.6.orig/kernel/mutex.c
+++ linux-2.6/kernel/mutex.c
@@ -140,14 +140,14 @@ EXPORT_SYMBOL(mutex_unlock);
*/
static inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
- unsigned long ip)
+ struct lockdep_map *nest_lock, unsigned long ip)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
preempt_disable();
- mutex_acquire(&lock->dep_map, subclass, 0, ip);
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
@@ -285,16 +285,25 @@ void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
+void __sched
+_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
+{
+ might_sleep();
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
@@ -303,7 +312,7 @@ mutex_lock_interruptible_nested(struct m
{
might_sleep();
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
- subclass, _RET_IP_);
+ subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -409,7 +418,7 @@ __mutex_lock_slowpath(atomic_t *lock_cou
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
@@ -417,7 +426,7 @@ __mutex_lock_killable_slowpath(atomic_t
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
@@ -425,7 +434,7 @@ __mutex_lock_interruptible_slowpath(atom
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
#endif
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock
2010-10-18 11:24 ` [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mutex_lock_nest_lock.patch --]
[-- Type: text/plain, Size: 5442 bytes --]
Provide the mutex_lock_nest_lock() annotation.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/lockdep.h | 3 +++
include/linux/mutex.h | 9 +++++++++
kernel/mutex.c | 25 +++++++++++++++++--------
3 files changed, 29 insertions(+), 8 deletions(-)
Index: linux-2.6/include/linux/lockdep.h
===================================================================
--- linux-2.6.orig/include/linux/lockdep.h
+++ linux-2.6/include/linux/lockdep.h
@@ -492,12 +492,15 @@ static inline void print_irqtrace_events
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# ifdef CONFIG_PROVE_LOCKING
# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
+# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
# else
# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
+# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
# endif
# define mutex_release(l, n, i) lock_release(l, n, i)
#else
# define mutex_acquire(l, s, t, i) do { } while (0)
+# define mutex_acquire_nest(l, s, t, n, i) do { } while (0)
# define mutex_release(l, n, i) do { } while (0)
#endif
Index: linux-2.6/include/linux/mutex.h
===================================================================
--- linux-2.6.orig/include/linux/mutex.h
+++ linux-2.6/include/linux/mutex.h
@@ -124,6 +124,7 @@ static inline int mutex_is_locked(struct
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
@@ -132,6 +133,13 @@ extern int __must_check mutex_lock_killa
#define mutex_lock(lock) mutex_lock_nested(lock, 0)
#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
+
+#define mutex_lock_nest_lock(lock, nest_lock) \
+do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
+} while (0)
+
#else
extern void mutex_lock(struct mutex *lock);
extern int __must_check mutex_lock_interruptible(struct mutex *lock);
@@ -140,6 +148,7 @@ extern int __must_check mutex_lock_killa
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
#endif
/*
Index: linux-2.6/kernel/mutex.c
===================================================================
--- linux-2.6.orig/kernel/mutex.c
+++ linux-2.6/kernel/mutex.c
@@ -140,14 +140,14 @@ EXPORT_SYMBOL(mutex_unlock);
*/
static inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
- unsigned long ip)
+ struct lockdep_map *nest_lock, unsigned long ip)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
preempt_disable();
- mutex_acquire(&lock->dep_map, subclass, 0, ip);
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
@@ -285,16 +285,25 @@ void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
+void __sched
+_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
+{
+ might_sleep();
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+}
+
+EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
+
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
@@ -303,7 +312,7 @@ mutex_lock_interruptible_nested(struct m
{
might_sleep();
return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
- subclass, _RET_IP_);
+ subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -409,7 +418,7 @@ __mutex_lock_slowpath(atomic_t *lock_cou
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
@@ -417,7 +426,7 @@ __mutex_lock_killable_slowpath(atomic_t
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
@@ -425,7 +434,7 @@ __mutex_lock_interruptible_slowpath(atom
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
#endif
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 17/20] mutex: Provide mutex_is_contended
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (16 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
` (3 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mutex-is-contended.patch --]
[-- Type: text/plain, Size: 675 bytes --]
Usable for lock-breaks and such.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/mutex.h | 5 +++++
1 file changed, 5 insertions(+)
Index: linux-2.6/include/linux/mutex.h
===================================================================
--- linux-2.6.orig/include/linux/mutex.h
+++ linux-2.6/include/linux/mutex.h
@@ -118,6 +118,11 @@ static inline int mutex_is_locked(struct
return atomic_read(&lock->count) != 1;
}
+static inline int mutex_is_contended(struct mutex *lock)
+{
+ return atomic_read(&lock->count) < 0;
+}
+
/*
* See kernel/mutex.c for detailed documentation of these APIs.
* Also see Documentation/mutex-design.txt.
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 17/20] mutex: Provide mutex_is_contended
2010-10-18 11:24 ` [PATCH 17/20] mutex: Provide mutex_is_contended Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mutex-is-contended.patch --]
[-- Type: text/plain, Size: 677 bytes --]
Usable for lock-breaks and such.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/mutex.h | 5 +++++
1 file changed, 5 insertions(+)
Index: linux-2.6/include/linux/mutex.h
===================================================================
--- linux-2.6.orig/include/linux/mutex.h
+++ linux-2.6/include/linux/mutex.h
@@ -118,6 +118,11 @@ static inline int mutex_is_locked(struct
return atomic_read(&lock->count) != 1;
}
+static inline int mutex_is_contended(struct mutex *lock)
+{
+ return atomic_read(&lock->count) < 0;
+}
+
/*
* See kernel/mutex.c for detailed documentation of these APIs.
* Also see Documentation/mutex-design.txt.
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (17 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 17/20] mutex: Provide mutex_is_contended Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 19/20] mm: Extended batches for generic mmu_gather Peter Zijlstra
` (2 subsequent siblings)
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-mutex.patch --]
[-- Type: text/plain, Size: 19732 bytes --]
Straight fwd conversion of i_mmap_lock and anon_vma->lock to mutexes.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/mm/hugetlbpage.c | 4 ++--
fs/gfs2/main.c | 2 +-
fs/hugetlbfs/inode.c | 4 ++--
fs/inode.c | 2 +-
fs/nilfs2/btnode.c | 2 +-
include/linux/fs.h | 2 +-
include/linux/mm.h | 2 +-
include/linux/rmap.h | 12 ++++++------
kernel/fork.c | 4 ++--
mm/filemap_xip.c | 4 ++--
mm/fremap.c | 4 ++--
mm/hugetlb.c | 12 ++++++------
mm/memory-failure.c | 4 ++--
mm/memory.c | 14 +++++++-------
mm/mmap.c | 18 +++++++++---------
mm/mremap.c | 4 ++--
mm/rmap.c | 22 +++++++++++-----------
17 files changed, 58 insertions(+), 58 deletions(-)
Index: linux-2.6/arch/x86/mm/hugetlbpage.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/hugetlbpage.c
+++ linux-2.6/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_str
if (!vma_shareable(vma, addr))
return;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_str
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
/*
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct ino
pgoff = offset >> PAGE_SHIFT;
i_size_write(inode, offset);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (!prio_tree_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
truncate_hugepages(inode, offset);
return 0;
}
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -257,7 +257,7 @@ void inode_init_once(struct inode *inode
INIT_LIST_HEAD(&inode->i_devices);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
+ mutex_init(&inode->i_data.i_mmap_lock);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -626,7 +626,7 @@ struct address_space {
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
- spinlock_t i_mmap_lock; /* protect tree, count, list */
+ struct mutex i_mmap_lock; /* protect tree, count, list */
unsigned int truncate_count; /* Cover race condition with truncate */
unsigned long nrpages; /* number of total pages */
pgoff_t writeback_index;/* writeback starts here */
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -758,7 +758,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
- spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */
+ struct mutex *i_mmap_lock; /* For unmap_mapping_range: */
unsigned long truncate_count; /* Compare vm_truncate_count */
};
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/memcontrol.h>
/*
@@ -25,7 +25,7 @@
* pointing to this anon_vma once its vma list is empty.
*/
struct anon_vma {
- spinlock_t lock; /* Serialize access to vma list */
+ struct mutex lock; /* Serialize access to vma list */
struct anon_vma *root; /* Root of this anon_vma tree */
/*
* The refcount is taken on an anon_vma when there is no
@@ -93,24 +93,24 @@ static inline void vma_lock_anon_vma(str
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_lock(&anon_vma->root->lock);
+ mutex_lock(&anon_vma->root->lock);
}
static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_unlock(&anon_vma->root->lock);
+ mutex_unlock(&anon_vma->root->lock);
}
static inline void anon_vma_lock(struct anon_vma *anon_vma)
{
- spin_lock(&anon_vma->root->lock);
+ mutex_lock(&anon_vma->root->lock);
}
static inline void anon_vma_unlock(struct anon_vma *anon_vma)
{
- spin_unlock(&anon_vma->root->lock);
+ mutex_unlock(&anon_vma->root->lock);
}
/*
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -369,7 +369,7 @@ static int dup_mmap(struct mm_struct *mm
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (tmp->vm_flags & VM_SHARED)
mapping->i_mmap_writable++;
tmp->vm_truncate_count = mpnt->vm_truncate_count;
@@ -377,7 +377,7 @@ static int dup_mmap(struct mm_struct *mm
/* insert tmp into the share list, just after mpnt */
vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
/*
Index: linux-2.6/mm/filemap_xip.c
===================================================================
--- linux-2.6.orig/mm/filemap_xip.c
+++ linux-2.6/mm/filemap_xip.c
@@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapp
return;
retry:
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm;
address = vma->vm_start +
@@ -201,7 +201,7 @@ retry:
page_cache_release(page);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (locked) {
mutex_unlock(&xip_sparse_mutex);
Index: linux-2.6/mm/fremap.c
===================================================================
--- linux-2.6.orig/mm/fremap.c
+++ linux-2.6/mm/fremap.c
@@ -208,13 +208,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
}
goto out;
}
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
flush_dcache_mmap_lock(mapping);
vma->vm_flags |= VM_NONLINEAR;
vma_prio_tree_remove(vma, &mapping->i_mmap);
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
flush_dcache_mmap_unlock(mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
if (vma->vm_flags & VM_LOCKED) {
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c
+++ linux-2.6/mm/hugetlb.c
@@ -2248,9 +2248,9 @@ void __unmap_hugepage_range(struct vm_ar
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page)
{
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_lock(&vma->vm_file->f_mapping->i_mmap_lock);
__unmap_hugepage_range(vma, start, end, ref_page);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
}
/*
@@ -2282,7 +2282,7 @@ static int unmap_ref_private(struct mm_s
* this mapping should be shared between all the VMAs,
* __unmap_hugepage_range() is called as the lock is already held
*/
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
/* Do not unmap the current VMA */
if (iter_vma == vma)
@@ -2300,7 +2300,7 @@ static int unmap_ref_private(struct mm_s
address, address + huge_page_size(h),
page);
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return 1;
}
@@ -2775,7 +2775,7 @@ void hugetlb_change_protection(struct vm
BUG_ON(address >= end);
flush_cache_range(vma, address, end);
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_lock(&vma->vm_file->f_mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
for (; address < end; address += huge_page_size(h)) {
ptep = huge_pte_offset(mm, address);
@@ -2790,7 +2790,7 @@ void hugetlb_change_protection(struct vm
}
}
spin_unlock(&mm->page_table_lock);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
flush_tlb_range(vma, start, end);
}
Index: linux-2.6/mm/memory-failure.c
===================================================================
--- linux-2.6.orig/mm/memory-failure.c
+++ linux-2.6/mm/memory-failure.c
@@ -424,7 +424,7 @@ static void collect_procs_file(struct pa
*/
read_lock(&tasklist_lock);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
for_each_process(tsk) {
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -444,7 +444,7 @@ static void collect_procs_file(struct pa
add_to_kill(tsk, page, vma, to_kill, tkc);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
read_unlock(&tasklist_lock);
}
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1177,7 +1177,7 @@ unsigned long unmap_vmas(struct mmu_gath
{
long zap_work = ZAP_BLOCK_SIZE;
unsigned long start = start_addr;
- spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
+ struct mutex *i_mmap_lock = details ? details->i_mmap_lock : NULL;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1227,7 +1227,7 @@ unsigned long unmap_vmas(struct mmu_gath
}
if (need_resched() ||
- (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
+ (i_mmap_lock && mutex_is_contended(i_mmap_lock))) {
if (i_mmap_lock)
goto out;
cond_resched();
@@ -2520,7 +2520,7 @@ again:
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
- need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
+ need_break = need_resched() || mutex_is_contended(details->i_mmap_lock);
if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
@@ -2534,9 +2534,9 @@ again:
goto again;
}
- spin_unlock(details->i_mmap_lock);
+ mutex_unlock(details->i_mmap_lock);
cond_resched();
- spin_lock(details->i_mmap_lock);
+ mutex_lock(details->i_mmap_lock);
return -EINTR;
}
@@ -2632,7 +2632,7 @@ void unmap_mapping_range(struct address_
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
/* Protect against endless unmapping loops */
mapping->truncate_count++;
@@ -2647,7 +2647,7 @@ void unmap_mapping_range(struct address_
unmap_mapping_range_tree(&mapping->i_mmap, &details);
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
EXPORT_SYMBOL(unmap_mapping_range);
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -216,9 +216,9 @@ void unlink_file_vma(struct vm_area_stru
if (file) {
struct address_space *mapping = file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file, mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
}
@@ -455,7 +455,7 @@ static void vma_link(struct mm_struct *m
mapping = vma->vm_file->f_mapping;
if (mapping) {
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma->vm_truncate_count = mapping->truncate_count;
}
@@ -463,7 +463,7 @@ static void vma_link(struct mm_struct *m
__vma_link_file(vma);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
mm->map_count++;
validate_mm(mm);
@@ -566,7 +566,7 @@ again: remove_next = 1 + (end > next->
mapping = file->f_mapping;
if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (importer &&
vma->vm_truncate_count != next->vm_truncate_count) {
/*
@@ -640,7 +640,7 @@ again: remove_next = 1 + (end > next->
if (anon_vma)
anon_vma_unlock(anon_vma);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (remove_next) {
if (file) {
@@ -2497,7 +2497,7 @@ static void vm_lock_anon_vma(struct mm_s
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+ mutex_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
/*
* We can safely modify head.next after taking the
* anon_vma->root->lock. If some other vma in this mm shares
@@ -2527,7 +2527,7 @@ static void vm_lock_mapping(struct mm_st
*/
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
- spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+ mutex_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
}
}
@@ -2626,7 +2626,7 @@ static void vm_unlock_mapping(struct add
* AS_MM_ALL_LOCKS can't change to 0 from under us
* because we hold the mm_all_locks_mutex.
*/
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
&mapping->flags))
BUG();
Index: linux-2.6/mm/mremap.c
===================================================================
--- linux-2.6.orig/mm/mremap.c
+++ linux-2.6/mm/mremap.c
@@ -90,7 +90,7 @@ static void move_ptes(struct vm_area_str
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (new_vma->vm_truncate_count &&
new_vma->vm_truncate_count != vma->vm_truncate_count)
new_vma->vm_truncate_count = 0;
@@ -122,7 +122,7 @@ static void move_ptes(struct vm_area_str
pte_unmap_nested(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
}
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -302,7 +302,7 @@ static void anon_vma_ctor(void *data)
{
struct anon_vma *anon_vma = data;
- spin_lock_init(&anon_vma->lock);
+ mutex_init(&anon_vma->lock);
atomic_set(&anon_vma->refcount, 0);
INIT_LIST_HEAD(&anon_vma->head);
}
@@ -635,7 +635,7 @@ static int page_referenced_file(struct p
*/
BUG_ON(!PageLocked(page));
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
/*
* i_mmap_lock does not stabilize mapcount at all, but mapcount
@@ -660,7 +660,7 @@ static int page_referenced_file(struct p
break;
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return referenced;
}
@@ -747,7 +747,7 @@ static int page_mkclean_file(struct addr
BUG_ON(PageAnon(page));
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_flags & VM_SHARED) {
unsigned long address = vma_address(page, vma);
@@ -756,7 +756,7 @@ static int page_mkclean_file(struct addr
ret += page_mkclean_one(page, vma, address);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
@@ -1330,7 +1330,7 @@ static int try_to_unmap_file(struct page
unsigned long max_nl_size = 0;
unsigned int mapcount;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
@@ -1376,7 +1376,7 @@ static int try_to_unmap_file(struct page
mapcount = page_mapcount(page);
if (!mapcount)
goto out;
- cond_resched_lock(&mapping->i_mmap_lock);
+ cond_resched();
max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
if (max_nl_cursor == 0)
@@ -1398,7 +1398,7 @@ static int try_to_unmap_file(struct page
}
vma->vm_private_data = (void *) max_nl_cursor;
}
- cond_resched_lock(&mapping->i_mmap_lock);
+ cond_resched();
max_nl_cursor += CLUSTER_SIZE;
} while (max_nl_cursor <= max_nl_size);
@@ -1410,7 +1410,7 @@ static int try_to_unmap_file(struct page
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
vma->vm_private_data = NULL;
out:
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
@@ -1525,7 +1525,7 @@ static int rmap_walk_file(struct page *p
if (!mapping)
return ret;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
@@ -1539,7 +1539,7 @@ static int rmap_walk_file(struct page *p
* never contain migration ptes. Decide what to do about this
* limitation to linear when we need rmap_walk() on nonlinear.
*/
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
Index: linux-2.6/fs/gfs2/main.c
===================================================================
--- linux-2.6.orig/fs/gfs2/main.c
+++ linux-2.6/fs/gfs2/main.c
@@ -61,7 +61,7 @@ static void gfs2_init_gl_aspace_once(voi
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
+ mutex_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
Index: linux-2.6/fs/nilfs2/btnode.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/btnode.c
+++ linux-2.6/fs/nilfs2/btnode.c
@@ -43,7 +43,7 @@ void nilfs_btnode_cache_init_once(struct
INIT_LIST_HEAD(&btnc->private_list);
spin_lock_init(&btnc->private_lock);
- spin_lock_init(&btnc->i_mmap_lock);
+ mutex_init(&btnc->i_mmap_lock);
INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
}
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes
2010-10-18 11:24 ` [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-mutex.patch --]
[-- Type: text/plain, Size: 19734 bytes --]
Straight fwd conversion of i_mmap_lock and anon_vma->lock to mutexes.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/mm/hugetlbpage.c | 4 ++--
fs/gfs2/main.c | 2 +-
fs/hugetlbfs/inode.c | 4 ++--
fs/inode.c | 2 +-
fs/nilfs2/btnode.c | 2 +-
include/linux/fs.h | 2 +-
include/linux/mm.h | 2 +-
include/linux/rmap.h | 12 ++++++------
kernel/fork.c | 4 ++--
mm/filemap_xip.c | 4 ++--
mm/fremap.c | 4 ++--
mm/hugetlb.c | 12 ++++++------
mm/memory-failure.c | 4 ++--
mm/memory.c | 14 +++++++-------
mm/mmap.c | 18 +++++++++---------
mm/mremap.c | 4 ++--
mm/rmap.c | 22 +++++++++++-----------
17 files changed, 58 insertions(+), 58 deletions(-)
Index: linux-2.6/arch/x86/mm/hugetlbpage.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/hugetlbpage.c
+++ linux-2.6/arch/x86/mm/hugetlbpage.c
@@ -72,7 +72,7 @@ static void huge_pmd_share(struct mm_str
if (!vma_shareable(vma, addr))
return;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
@@ -97,7 +97,7 @@ static void huge_pmd_share(struct mm_str
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
/*
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct ino
pgoff = offset >> PAGE_SHIFT;
i_size_write(inode, offset);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (!prio_tree_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
truncate_hugepages(inode, offset);
return 0;
}
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -257,7 +257,7 @@ void inode_init_once(struct inode *inode
INIT_LIST_HEAD(&inode->i_devices);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
+ mutex_init(&inode->i_data.i_mmap_lock);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -626,7 +626,7 @@ struct address_space {
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
- spinlock_t i_mmap_lock; /* protect tree, count, list */
+ struct mutex i_mmap_lock; /* protect tree, count, list */
unsigned int truncate_count; /* Cover race condition with truncate */
unsigned long nrpages; /* number of total pages */
pgoff_t writeback_index;/* writeback starts here */
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -758,7 +758,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
- spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */
+ struct mutex *i_mmap_lock; /* For unmap_mapping_range: */
unsigned long truncate_count; /* Compare vm_truncate_count */
};
Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/memcontrol.h>
/*
@@ -25,7 +25,7 @@
* pointing to this anon_vma once its vma list is empty.
*/
struct anon_vma {
- spinlock_t lock; /* Serialize access to vma list */
+ struct mutex lock; /* Serialize access to vma list */
struct anon_vma *root; /* Root of this anon_vma tree */
/*
* The refcount is taken on an anon_vma when there is no
@@ -93,24 +93,24 @@ static inline void vma_lock_anon_vma(str
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_lock(&anon_vma->root->lock);
+ mutex_lock(&anon_vma->root->lock);
}
static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_unlock(&anon_vma->root->lock);
+ mutex_unlock(&anon_vma->root->lock);
}
static inline void anon_vma_lock(struct anon_vma *anon_vma)
{
- spin_lock(&anon_vma->root->lock);
+ mutex_lock(&anon_vma->root->lock);
}
static inline void anon_vma_unlock(struct anon_vma *anon_vma)
{
- spin_unlock(&anon_vma->root->lock);
+ mutex_unlock(&anon_vma->root->lock);
}
/*
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -369,7 +369,7 @@ static int dup_mmap(struct mm_struct *mm
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (tmp->vm_flags & VM_SHARED)
mapping->i_mmap_writable++;
tmp->vm_truncate_count = mpnt->vm_truncate_count;
@@ -377,7 +377,7 @@ static int dup_mmap(struct mm_struct *mm
/* insert tmp into the share list, just after mpnt */
vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
/*
Index: linux-2.6/mm/filemap_xip.c
===================================================================
--- linux-2.6.orig/mm/filemap_xip.c
+++ linux-2.6/mm/filemap_xip.c
@@ -183,7 +183,7 @@ __xip_unmap (struct address_space * mapp
return;
retry:
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm;
address = vma->vm_start +
@@ -201,7 +201,7 @@ retry:
page_cache_release(page);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (locked) {
mutex_unlock(&xip_sparse_mutex);
Index: linux-2.6/mm/fremap.c
===================================================================
--- linux-2.6.orig/mm/fremap.c
+++ linux-2.6/mm/fremap.c
@@ -208,13 +208,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsign
}
goto out;
}
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
flush_dcache_mmap_lock(mapping);
vma->vm_flags |= VM_NONLINEAR;
vma_prio_tree_remove(vma, &mapping->i_mmap);
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
flush_dcache_mmap_unlock(mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
if (vma->vm_flags & VM_LOCKED) {
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c
+++ linux-2.6/mm/hugetlb.c
@@ -2248,9 +2248,9 @@ void __unmap_hugepage_range(struct vm_ar
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page)
{
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_lock(&vma->vm_file->f_mapping->i_mmap_lock);
__unmap_hugepage_range(vma, start, end, ref_page);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
}
/*
@@ -2282,7 +2282,7 @@ static int unmap_ref_private(struct mm_s
* this mapping should be shared between all the VMAs,
* __unmap_hugepage_range() is called as the lock is already held
*/
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
/* Do not unmap the current VMA */
if (iter_vma == vma)
@@ -2300,7 +2300,7 @@ static int unmap_ref_private(struct mm_s
address, address + huge_page_size(h),
page);
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return 1;
}
@@ -2775,7 +2775,7 @@ void hugetlb_change_protection(struct vm
BUG_ON(address >= end);
flush_cache_range(vma, address, end);
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_lock(&vma->vm_file->f_mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
for (; address < end; address += huge_page_size(h)) {
ptep = huge_pte_offset(mm, address);
@@ -2790,7 +2790,7 @@ void hugetlb_change_protection(struct vm
}
}
spin_unlock(&mm->page_table_lock);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
flush_tlb_range(vma, start, end);
}
Index: linux-2.6/mm/memory-failure.c
===================================================================
--- linux-2.6.orig/mm/memory-failure.c
+++ linux-2.6/mm/memory-failure.c
@@ -424,7 +424,7 @@ static void collect_procs_file(struct pa
*/
read_lock(&tasklist_lock);
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
for_each_process(tsk) {
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -444,7 +444,7 @@ static void collect_procs_file(struct pa
add_to_kill(tsk, page, vma, to_kill, tkc);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
read_unlock(&tasklist_lock);
}
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1177,7 +1177,7 @@ unsigned long unmap_vmas(struct mmu_gath
{
long zap_work = ZAP_BLOCK_SIZE;
unsigned long start = start_addr;
- spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
+ struct mutex *i_mmap_lock = details ? details->i_mmap_lock : NULL;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
@@ -1227,7 +1227,7 @@ unsigned long unmap_vmas(struct mmu_gath
}
if (need_resched() ||
- (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
+ (i_mmap_lock && mutex_is_contended(i_mmap_lock))) {
if (i_mmap_lock)
goto out;
cond_resched();
@@ -2520,7 +2520,7 @@ again:
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
- need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
+ need_break = need_resched() || mutex_is_contended(details->i_mmap_lock);
if (restart_addr >= end_addr) {
/* We have now completed this vma: mark it so */
@@ -2534,9 +2534,9 @@ again:
goto again;
}
- spin_unlock(details->i_mmap_lock);
+ mutex_unlock(details->i_mmap_lock);
cond_resched();
- spin_lock(details->i_mmap_lock);
+ mutex_lock(details->i_mmap_lock);
return -EINTR;
}
@@ -2632,7 +2632,7 @@ void unmap_mapping_range(struct address_
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
/* Protect against endless unmapping loops */
mapping->truncate_count++;
@@ -2647,7 +2647,7 @@ void unmap_mapping_range(struct address_
unmap_mapping_range_tree(&mapping->i_mmap, &details);
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
EXPORT_SYMBOL(unmap_mapping_range);
Index: linux-2.6/mm/mmap.c
===================================================================
--- linux-2.6.orig/mm/mmap.c
+++ linux-2.6/mm/mmap.c
@@ -216,9 +216,9 @@ void unlink_file_vma(struct vm_area_stru
if (file) {
struct address_space *mapping = file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file, mapping);
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
}
}
@@ -455,7 +455,7 @@ static void vma_link(struct mm_struct *m
mapping = vma->vm_file->f_mapping;
if (mapping) {
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma->vm_truncate_count = mapping->truncate_count;
}
@@ -463,7 +463,7 @@ static void vma_link(struct mm_struct *m
__vma_link_file(vma);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
mm->map_count++;
validate_mm(mm);
@@ -566,7 +566,7 @@ again: remove_next = 1 + (end > next->
mapping = file->f_mapping;
if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (importer &&
vma->vm_truncate_count != next->vm_truncate_count) {
/*
@@ -640,7 +640,7 @@ again: remove_next = 1 + (end > next->
if (anon_vma)
anon_vma_unlock(anon_vma);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (remove_next) {
if (file) {
@@ -2497,7 +2497,7 @@ static void vm_lock_anon_vma(struct mm_s
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+ mutex_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
/*
* We can safely modify head.next after taking the
* anon_vma->root->lock. If some other vma in this mm shares
@@ -2527,7 +2527,7 @@ static void vm_lock_mapping(struct mm_st
*/
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
- spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+ mutex_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
}
}
@@ -2626,7 +2626,7 @@ static void vm_unlock_mapping(struct add
* AS_MM_ALL_LOCKS can't change to 0 from under us
* because we hold the mm_all_locks_mutex.
*/
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
&mapping->flags))
BUG();
Index: linux-2.6/mm/mremap.c
===================================================================
--- linux-2.6.orig/mm/mremap.c
+++ linux-2.6/mm/mremap.c
@@ -90,7 +90,7 @@ static void move_ptes(struct vm_area_str
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
if (new_vma->vm_truncate_count &&
new_vma->vm_truncate_count != vma->vm_truncate_count)
new_vma->vm_truncate_count = 0;
@@ -122,7 +122,7 @@ static void move_ptes(struct vm_area_str
pte_unmap_nested(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
}
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -302,7 +302,7 @@ static void anon_vma_ctor(void *data)
{
struct anon_vma *anon_vma = data;
- spin_lock_init(&anon_vma->lock);
+ mutex_init(&anon_vma->lock);
atomic_set(&anon_vma->refcount, 0);
INIT_LIST_HEAD(&anon_vma->head);
}
@@ -635,7 +635,7 @@ static int page_referenced_file(struct p
*/
BUG_ON(!PageLocked(page));
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
/*
* i_mmap_lock does not stabilize mapcount at all, but mapcount
@@ -660,7 +660,7 @@ static int page_referenced_file(struct p
break;
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return referenced;
}
@@ -747,7 +747,7 @@ static int page_mkclean_file(struct addr
BUG_ON(PageAnon(page));
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_flags & VM_SHARED) {
unsigned long address = vma_address(page, vma);
@@ -756,7 +756,7 @@ static int page_mkclean_file(struct addr
ret += page_mkclean_one(page, vma, address);
}
}
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
@@ -1330,7 +1330,7 @@ static int try_to_unmap_file(struct page
unsigned long max_nl_size = 0;
unsigned int mapcount;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
@@ -1376,7 +1376,7 @@ static int try_to_unmap_file(struct page
mapcount = page_mapcount(page);
if (!mapcount)
goto out;
- cond_resched_lock(&mapping->i_mmap_lock);
+ cond_resched();
max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
if (max_nl_cursor == 0)
@@ -1398,7 +1398,7 @@ static int try_to_unmap_file(struct page
}
vma->vm_private_data = (void *) max_nl_cursor;
}
- cond_resched_lock(&mapping->i_mmap_lock);
+ cond_resched();
max_nl_cursor += CLUSTER_SIZE;
} while (max_nl_cursor <= max_nl_size);
@@ -1410,7 +1410,7 @@ static int try_to_unmap_file(struct page
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
vma->vm_private_data = NULL;
out:
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
@@ -1525,7 +1525,7 @@ static int rmap_walk_file(struct page *p
if (!mapping)
return ret;
- spin_lock(&mapping->i_mmap_lock);
+ mutex_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
@@ -1539,7 +1539,7 @@ static int rmap_walk_file(struct page *p
* never contain migration ptes. Decide what to do about this
* limitation to linear when we need rmap_walk() on nonlinear.
*/
- spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->i_mmap_lock);
return ret;
}
Index: linux-2.6/fs/gfs2/main.c
===================================================================
--- linux-2.6.orig/fs/gfs2/main.c
+++ linux-2.6/fs/gfs2/main.c
@@ -61,7 +61,7 @@ static void gfs2_init_gl_aspace_once(voi
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
+ mutex_init(&mapping->i_mmap_lock);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
Index: linux-2.6/fs/nilfs2/btnode.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/btnode.c
+++ linux-2.6/fs/nilfs2/btnode.c
@@ -43,7 +43,7 @@ void nilfs_btnode_cache_init_once(struct
INIT_LIST_HEAD(&btnc->private_list);
spin_lock_init(&btnc->private_lock);
- spin_lock_init(&btnc->i_mmap_lock);
+ mutex_init(&btnc->i_mmap_lock);
INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
}
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 19/20] mm: Extended batches for generic mmu_gather
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (18 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
2010-10-18 14:55 ` [PATCH 00/20] mm: Preemptibility -v5 Stephen Rothwell
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-tlb_gather-more-batch.patch --]
[-- Type: text/plain, Size: 5423 bytes --]
Instead of using a single batch (the small on-stack, or an allocated
page), try and extend the batch every time it runs out and only flush
once either the extend fails or we're done.
Requested-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/asm-generic/tlb.h | 122 ++++++++++++++++++++++++++++++----------------
1 file changed, 82 insertions(+), 40 deletions(-)
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -17,16 +17,6 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
- #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
-#else
- #define tlb_fast_mode(tlb) 1
-#endif
-
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
/*
* Semi RCU freeing of the page directories.
@@ -70,31 +60,66 @@ extern void tlb_remove_table(struct mmu_
#endif
+struct mmu_gather_batch {
+ struct mmu_gather_batch *next;
+ unsigned int nr;
+ unsigned int max;
+ struct page *pages[0];
+};
+
+#define MAX_GATHER_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
struct mmu_gather {
struct mm_struct *mm;
- unsigned int nr; /* set to ~0U means fast mode */
- unsigned int max; /* nr < max */
- unsigned int need_flush;/* Really unmapped some ptes? */
- unsigned int fullmm; /* non-zero means full mm flush */
- struct page **pages;
- struct page *local[8];
+ unsigned int need_flush : 1, /* Did free PTEs */
+ fast_mode : 1; /* No batching */
+ unsigned int fullmm; /* Flush full mm */
+
+ struct mmu_gather_batch *active;
+ struct mmu_gather_batch local;
+ struct page *__pages[8];
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
struct mmu_table_batch *batch;
#endif
};
-static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+/*
+ * For UP we don't need to worry about TLB flush
+ * and page free order so much..
+ */
+#ifdef CONFIG_SMP
+ #define tlb_fast_mode(tlb) (tlb->fast_mode)
+#else
+ #define tlb_fast_mode(tlb) 1
+#endif
+
+static inline int tlb_next_batch(struct mmu_gather *tlb)
{
- unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+ struct mmu_gather_batch *batch;
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
+ batch = tlb->active;
+ if (batch->next) {
+ tlb->active = batch->next;
+ return 1;
}
+
+ batch = (void *)__get_free_pages(GFP_ATOMIC, 0);
+ if (!batch)
+ return 0;
+
+ batch->next = NULL;
+ batch->nr = 0;
+ batch->max = MAX_GATHER_BATCH;
+
+ tlb->active->next = batch;
+ tlb->active = batch;
+
+ return 1;
}
/* tlb_gather_mmu
@@ -105,17 +130,16 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
{
tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
-
- if (num_online_cpus() > 1) {
- tlb->nr = 0;
- __tlb_alloc_pages(tlb);
- } else /* Use fast mode if only one CPU is online */
- tlb->nr = ~0U;
-
+ tlb->need_flush = 0;
+ if (num_online_cpus() == 1)
+ tlb->fast_mode = 1;
tlb->fullmm = full_mm_flush;
+ tlb->local.next = NULL;
+ tlb->local.nr = 0;
+ tlb->local.max = ARRAY_SIZE(tlb->__pages);
+ tlb->active = &tlb->local;
+
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb->batch = NULL;
#endif
@@ -124,6 +148,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
static inline void
tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
+ struct mmu_gather_batch *batch;
+
if (!tlb->need_flush)
return;
tlb->need_flush = 0;
@@ -131,12 +157,14 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
- if (!tlb_fast_mode(tlb)) {
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_pages(tlb);
+ if (tlb_fast_mode(tlb))
+ return;
+
+ for (batch = &tlb->local; batch; batch = batch->next) {
+ free_pages_and_swap_cache(batch->pages, batch->nr);
+ batch->nr = 0;
}
+ tlb->active = &tlb->local;
}
/* tlb_finish_mmu
@@ -146,13 +174,18 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
+ struct mmu_gather_batch *batch, *next;
+
tlb_flush_mmu(tlb, start, end);
/* keep the page table cache within bounds */
check_pgt_cache();
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
+ for (batch = tlb->local.next; batch; batch = next) {
+ next = batch->next;
+ free_pages((unsigned long)batch, 0);
+ }
+ tlb->local.next = NULL;
}
/* tlb_remove_page
@@ -162,14 +195,23 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
*/
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
{
+ struct mmu_gather_batch *batch;
+
tlb->need_flush = 1;
+
if (tlb_fast_mode(tlb)) {
free_page_and_swap_cache(page);
return;
}
- tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= tlb->max)
- tlb_flush_mmu(tlb, 0, 0);
+
+ batch = tlb->active;
+ if (batch->nr == batch->max) {
+ if (!tlb_next_batch(tlb))
+ tlb_flush_mmu(tlb, 0, 0);
+ batch = tlb->active;
+ }
+
+ batch->pages[batch->nr++] = page;
}
/**
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 19/20] mm: Extended batches for generic mmu_gather
2010-10-18 11:24 ` [PATCH 19/20] mm: Extended batches for generic mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-tlb_gather-more-batch.patch --]
[-- Type: text/plain, Size: 5425 bytes --]
Instead of using a single batch (the small on-stack, or an allocated
page), try and extend the batch every time it runs out and only flush
once either the extend fails or we're done.
Requested-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/asm-generic/tlb.h | 122 ++++++++++++++++++++++++++++++----------------
1 file changed, 82 insertions(+), 40 deletions(-)
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -17,16 +17,6 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-/*
- * For UP we don't need to worry about TLB flush
- * and page free order so much..
- */
-#ifdef CONFIG_SMP
- #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
-#else
- #define tlb_fast_mode(tlb) 1
-#endif
-
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
/*
* Semi RCU freeing of the page directories.
@@ -70,31 +60,66 @@ extern void tlb_remove_table(struct mmu_
#endif
+struct mmu_gather_batch {
+ struct mmu_gather_batch *next;
+ unsigned int nr;
+ unsigned int max;
+ struct page *pages[0];
+};
+
+#define MAX_GATHER_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
+
/* struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
struct mmu_gather {
struct mm_struct *mm;
- unsigned int nr; /* set to ~0U means fast mode */
- unsigned int max; /* nr < max */
- unsigned int need_flush;/* Really unmapped some ptes? */
- unsigned int fullmm; /* non-zero means full mm flush */
- struct page **pages;
- struct page *local[8];
+ unsigned int need_flush : 1, /* Did free PTEs */
+ fast_mode : 1; /* No batching */
+ unsigned int fullmm; /* Flush full mm */
+
+ struct mmu_gather_batch *active;
+ struct mmu_gather_batch local;
+ struct page *__pages[8];
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
struct mmu_table_batch *batch;
#endif
};
-static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
+/*
+ * For UP we don't need to worry about TLB flush
+ * and page free order so much..
+ */
+#ifdef CONFIG_SMP
+ #define tlb_fast_mode(tlb) (tlb->fast_mode)
+#else
+ #define tlb_fast_mode(tlb) 1
+#endif
+
+static inline int tlb_next_batch(struct mmu_gather *tlb)
{
- unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
+ struct mmu_gather_batch *batch;
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
+ batch = tlb->active;
+ if (batch->next) {
+ tlb->active = batch->next;
+ return 1;
}
+
+ batch = (void *)__get_free_pages(GFP_ATOMIC, 0);
+ if (!batch)
+ return 0;
+
+ batch->next = NULL;
+ batch->nr = 0;
+ batch->max = MAX_GATHER_BATCH;
+
+ tlb->active->next = batch;
+ tlb->active = batch;
+
+ return 1;
}
/* tlb_gather_mmu
@@ -105,17 +130,16 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
{
tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
-
- if (num_online_cpus() > 1) {
- tlb->nr = 0;
- __tlb_alloc_pages(tlb);
- } else /* Use fast mode if only one CPU is online */
- tlb->nr = ~0U;
-
+ tlb->need_flush = 0;
+ if (num_online_cpus() == 1)
+ tlb->fast_mode = 1;
tlb->fullmm = full_mm_flush;
+ tlb->local.next = NULL;
+ tlb->local.nr = 0;
+ tlb->local.max = ARRAY_SIZE(tlb->__pages);
+ tlb->active = &tlb->local;
+
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb->batch = NULL;
#endif
@@ -124,6 +148,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
static inline void
tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
+ struct mmu_gather_batch *batch;
+
if (!tlb->need_flush)
return;
tlb->need_flush = 0;
@@ -131,12 +157,14 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
- if (!tlb_fast_mode(tlb)) {
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_pages(tlb);
+ if (tlb_fast_mode(tlb))
+ return;
+
+ for (batch = &tlb->local; batch; batch = batch->next) {
+ free_pages_and_swap_cache(batch->pages, batch->nr);
+ batch->nr = 0;
}
+ tlb->active = &tlb->local;
}
/* tlb_finish_mmu
@@ -146,13 +174,18 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
+ struct mmu_gather_batch *batch, *next;
+
tlb_flush_mmu(tlb, start, end);
/* keep the page table cache within bounds */
check_pgt_cache();
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
+ for (batch = tlb->local.next; batch; batch = next) {
+ next = batch->next;
+ free_pages((unsigned long)batch, 0);
+ }
+ tlb->local.next = NULL;
}
/* tlb_remove_page
@@ -162,14 +195,23 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
*/
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
{
+ struct mmu_gather_batch *batch;
+
tlb->need_flush = 1;
+
if (tlb_fast_mode(tlb)) {
free_page_and_swap_cache(page);
return;
}
- tlb->pages[tlb->nr++] = page;
- if (tlb->nr >= tlb->max)
- tlb_flush_mmu(tlb, 0, 0);
+
+ batch = tlb->active;
+ if (batch->nr == batch->max) {
+ if (!tlb_next_batch(tlb))
+ tlb_flush_mmu(tlb, 0, 0);
+ batch = tlb->active;
+ }
+
+ batch->pages[batch->nr++] = page;
}
/**
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (19 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 19/20] mm: Extended batches for generic mmu_gather Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 14:55 ` [PATCH 00/20] mm: Preemptibility -v5 Stephen Rothwell
21 siblings, 1 reply; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-opt-page_lock_anon_vma.patch --]
[-- Type: text/plain, Size: 2816 bytes --]
Optimize the page_lock_anon_vma() fast path to be one LOCKed op,
instead of two.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
mm/rmap.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 67 insertions(+), 4 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -357,20 +357,75 @@ out:
return anon_vma;
}
+/*
+ * Similar to page_get_anon_vma() except it locks the anon_vma.
+ *
+ * Its a little more complex as it tries to keep the fast path to a single
+ * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
+ * reference like with page_get_anon_vma() and then block on the mutex.
+ */
struct anon_vma *page_lock_anon_vma(struct page *page)
{
- struct anon_vma *anon_vma = page_get_anon_vma(page);
+ struct anon_vma *anon_vma = NULL;
+ unsigned long anon_mapping;
+
+ rcu_read_lock();
+ anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
+ if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
+ goto out;
+ if (!page_mapped(page))
+ goto out;
+
+ anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
+ if (mutex_trylock(&anon_vma->root->lock)) {
+ /*
+ * If we observe a !0 refcount, then holding the lock ensures
+ * the anon_vma will not go away, see __put_anon_vma().
+ */
+ if (!atomic_read(&anon_vma->refcount)) {
+ anon_vma_unlock(anon_vma);
+ anon_vma = NULL;
+ }
+ goto out;
+ }
- if (anon_vma)
- anon_vma_lock(anon_vma);
+ /* trylock failed, we got to sleep */
+ if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+ anon_vma = NULL;
+ goto out;
+ }
+
+ if (!page_mapped(page)) {
+ put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ goto out;
+ }
+
+ /* we pinned the anon_vma, its safe to sleep */
+ rcu_read_unlock();
+ anon_vma_lock(anon_vma);
+
+ if (atomic_dec_and_test(&anon_vma->refcount)) {
+ /*
+ * Oops, we held the last refcount, release the lock
+ * and bail -- can't simply use put_anon_vma() because
+ * we'll deadlock on the anon_vma_lock() recursion.
+ */
+ anon_vma_unlock(anon_vma);
+ __put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ }
return anon_vma;
+
+out:
+ rcu_read_unlock();
+ return anon_vma;
}
void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
anon_vma_unlock(anon_vma);
- put_anon_vma(anon_vma);
}
/*
@@ -1462,6 +1517,14 @@ int try_to_munlock(struct page *page)
void __put_anon_vma(struct anon_vma *anon_vma)
{
+ /*
+ * Synchronize against page_lock_anon_vma() such that
+ * we can safely hold the lock without the anon_vma getting
+ * freed.
+ */
+ anon_vma_lock(anon_vma);
+ anon_vma_unlock(anon_vma);
+
if (anon_vma->root != anon_vma)
put_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path
2010-10-18 11:24 ` [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
@ 2010-10-18 11:24 ` Peter Zijlstra
0 siblings, 0 replies; 45+ messages in thread
From: Peter Zijlstra @ 2010-10-18 11:24 UTC (permalink / raw)
To: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds
Cc: linux-kernel, linux-arch, Benjamin Herrenschmidt, David Miller,
Hugh Dickins, Mel Gorman, Nick Piggin, Peter Zijlstra,
Paul McKenney, Yanmin Zhang, Stephen Rothwell
[-- Attachment #1: mm-opt-page_lock_anon_vma.patch --]
[-- Type: text/plain, Size: 2818 bytes --]
Optimize the page_lock_anon_vma() fast path to be one LOCKed op,
instead of two.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
mm/rmap.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 67 insertions(+), 4 deletions(-)
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -357,20 +357,75 @@ out:
return anon_vma;
}
+/*
+ * Similar to page_get_anon_vma() except it locks the anon_vma.
+ *
+ * Its a little more complex as it tries to keep the fast path to a single
+ * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
+ * reference like with page_get_anon_vma() and then block on the mutex.
+ */
struct anon_vma *page_lock_anon_vma(struct page *page)
{
- struct anon_vma *anon_vma = page_get_anon_vma(page);
+ struct anon_vma *anon_vma = NULL;
+ unsigned long anon_mapping;
+
+ rcu_read_lock();
+ anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
+ if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
+ goto out;
+ if (!page_mapped(page))
+ goto out;
+
+ anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
+ if (mutex_trylock(&anon_vma->root->lock)) {
+ /*
+ * If we observe a !0 refcount, then holding the lock ensures
+ * the anon_vma will not go away, see __put_anon_vma().
+ */
+ if (!atomic_read(&anon_vma->refcount)) {
+ anon_vma_unlock(anon_vma);
+ anon_vma = NULL;
+ }
+ goto out;
+ }
- if (anon_vma)
- anon_vma_lock(anon_vma);
+ /* trylock failed, we got to sleep */
+ if (!atomic_inc_not_zero(&anon_vma->refcount)) {
+ anon_vma = NULL;
+ goto out;
+ }
+
+ if (!page_mapped(page)) {
+ put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ goto out;
+ }
+
+ /* we pinned the anon_vma, its safe to sleep */
+ rcu_read_unlock();
+ anon_vma_lock(anon_vma);
+
+ if (atomic_dec_and_test(&anon_vma->refcount)) {
+ /*
+ * Oops, we held the last refcount, release the lock
+ * and bail -- can't simply use put_anon_vma() because
+ * we'll deadlock on the anon_vma_lock() recursion.
+ */
+ anon_vma_unlock(anon_vma);
+ __put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ }
return anon_vma;
+
+out:
+ rcu_read_unlock();
+ return anon_vma;
}
void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
anon_vma_unlock(anon_vma);
- put_anon_vma(anon_vma);
}
/*
@@ -1462,6 +1517,14 @@ int try_to_munlock(struct page *page)
void __put_anon_vma(struct anon_vma *anon_vma)
{
+ /*
+ * Synchronize against page_lock_anon_vma() such that
+ * we can safely hold the lock without the anon_vma getting
+ * freed.
+ */
+ anon_vma_lock(anon_vma);
+ anon_vma_unlock(anon_vma);
+
if (anon_vma->root != anon_vma)
put_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 00/20] mm: Preemptibility -v5
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
` (20 preceding siblings ...)
2010-10-18 11:24 ` [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
@ 2010-10-18 14:55 ` Stephen Rothwell
21 siblings, 0 replies; 45+ messages in thread
From: Stephen Rothwell @ 2010-10-18 14:55 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Andrea Arcangeli, Avi Kivity, Thomas Gleixner, Rik van Riel,
Ingo Molnar, akpm, Linus Torvalds, linux-kernel, linux-arch,
Benjamin Herrenschmidt, David Miller, Hugh Dickins, Mel Gorman,
Nick Piggin, Paul McKenney, Yanmin Zhang
[-- Attachment #1: Type: text/plain, Size: 597 bytes --]
Hi Peter,
On Mon, 18 Oct 2010 13:24:33 +0200 Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> Linus, Andrew, Stephen, can we add this to -next for .37?
Well it is late (the merge window will open in a couple of days, I
suspect) but this is version 5 of these patches (so they have had better
review than most). So, if Andrew or Linus want this stuff in .37, I will
add the tree, otherwise it would just be adding confusion for all the
code that will be integrated in .37.
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/
[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]
^ permalink raw reply [flat|nested] 45+ messages in thread
end of thread, other threads:[~2010-10-18 14:56 UTC | newest]
Thread overview: 45+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-18 11:24 [PATCH 00/20] mm: Preemptibility -v5 Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 01/20] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 02/20] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 03/20] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 04/20] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 05/20] mm: Simplify anon_vma refcounts Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 06/20] mm: Use refcounts for page_lock_anon_vma() Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 08/20] powerpc: " Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 09/20] sparc: " Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 10/20] s390: preemptible mmu_gather Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 11/20] arm: Preemptible mmu_gather Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 12/20] sh: " Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 13/20] um: " Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 14/20] ia64: " Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 15/20] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 16/20] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 17/20] mutex: Provide mutex_is_contended Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 18/20] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 19/20] mm: Extended batches for generic mmu_gather Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 11:24 ` [PATCH 20/20] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
2010-10-18 11:24 ` Peter Zijlstra
2010-10-18 14:55 ` [PATCH 00/20] mm: Preemptibility -v5 Stephen Rothwell
-- strict thread matches above, loose matches on Subject: below --
2010-08-28 14:16 [PATCH 00/20] mm: Preemptibility -v4 Peter Zijlstra
2010-08-28 14:16 ` [PATCH 07/20] mm: Preemptible mmu_gather Peter Zijlstra
2010-08-28 14:16 ` Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).