* [PATCH 1/15] ptwalk: p?d_none_or_clear_bad
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
@ 2005-03-09 22:06 ` Hugh Dickins
2005-03-09 22:07 ` [PATCH 2/15] ptwalk: change_protection Hugh Dickins
` (14 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:06 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Replace the repetitive p?d_none, p?d_bad, p?d_ERROR, p?d_clear clauses
by pgd_none_or_clear_bad, pud_none_or_clear_bad, pmd_none_or_clear_bad
inlines throughout common and i386 - avoids a sprinkling of "unlikely"s.
Tests inline, but unlikely error handling in mm/memory.c - so the ERROR
file and line won't tell much; but it comes too late anyway, and hardly
ever seen outside development.
Let mremap use them in get_one_pte_map, as it already did in _nested;
but leave follow_page and untouched_anonymous page just skipping _bad
as before - they don't have quite the same ownership of the mm.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
arch/i386/kernel/vm86.c | 21 +--------
include/asm-generic/pgtable.h | 44 ++++++++++++++++++++
mm/memory.c | 89 +++++++++++++++---------------------------
mm/mprotect.c | 21 +--------
mm/mremap.c | 24 +++--------
mm/msync.c | 21 +--------
mm/swapfile.c | 21 +--------
mm/vmalloc.c | 21 +--------
8 files changed, 100 insertions(+), 162 deletions(-)
--- 2.6.11-bk5/arch/i386/kernel/vm86.c 2005-03-02 07:38:52.000000000 +0000
+++ ptwalk1/arch/i386/kernel/vm86.c 2005-03-09 01:35:49.000000000 +0000
@@ -145,29 +145,14 @@ static void mark_screen_rdonly(struct ta
preempt_disable();
spin_lock(&tsk->mm->page_table_lock);
pgd = pgd_offset(tsk->mm, 0xA0000);
- if (pgd_none(*pgd))
+ if (pgd_none_or_clear_bad(pgd))
goto out;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- goto out;
- }
pud = pud_offset(pgd, 0xA0000);
- if (pud_none(*pud))
- goto out;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
goto out;
- }
pmd = pmd_offset(pud, 0xA0000);
- if (pmd_none(*pmd))
- goto out;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
+ if (pmd_none_or_clear_bad(pmd))
goto out;
- }
pte = mapped = pte_offset_map(pmd, 0xA0000);
for (i = 0; i < 32; i++) {
if (pte_present(*pte))
--- 2.6.11-bk5/include/asm-generic/pgtable.h 2005-03-09 01:12:48.000000000 +0000
+++ ptwalk1/include/asm-generic/pgtable.h 2005-03-09 01:35:49.000000000 +0000
@@ -135,4 +135,48 @@ static inline void ptep_set_wrprotect(st
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+ if (pgd_none(*pgd))
+ return 1;
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_clear_bad(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+ if (pud_none(*pud))
+ return 1;
+ if (unlikely(pud_bad(*pud))) {
+ pud_clear_bad(pud);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+ if (pmd_none(*pmd))
+ return 1;
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+ return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_GENERIC_PGTABLE_H */
--- 2.6.11-bk5/mm/memory.c 2005-03-09 01:12:53.000000000 +0000
+++ ptwalk1/mm/memory.c 2005-03-09 01:35:49.000000000 +0000
@@ -83,6 +83,30 @@ EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmalloc_earlyreserve);
/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none. Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+ pud_ERROR(*pud);
+ pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
+}
+
+/*
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
@@ -90,13 +114,8 @@ static inline void clear_pmd_range(struc
{
struct page *page;
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
if (!((start | end) & ~PMD_MASK)) {
/* Only clear full, aligned ranges */
page = pmd_page(*pmd);
@@ -112,14 +131,8 @@ static inline void clear_pud_range(struc
unsigned long addr = start, next;
pmd_t *pmd, *__pmd;
- if (pud_none(*pud))
+ if (pud_none_or_clear_bad(pud))
return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
- return;
- }
-
pmd = __pmd = pmd_offset(pud, start);
do {
next = (addr + PMD_SIZE) & PMD_MASK;
@@ -144,14 +157,8 @@ static inline void clear_pgd_range(struc
unsigned long addr = start, next;
pud_t *pud, *__pud;
- if (pgd_none(*pgd))
+ if (pgd_none_or_clear_bad(pgd))
return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- return;
- }
-
pud = __pud = pud_offset(pgd, start);
do {
next = (addr + PUD_SIZE) & PUD_MASK;
@@ -374,13 +381,8 @@ static int copy_pmd_range(struct mm_stru
next = (addr + PMD_SIZE) & PMD_MASK;
if (next > end || next <= addr)
next = end;
- if (pmd_none(*src_pmd))
- continue;
- if (pmd_bad(*src_pmd)) {
- pmd_ERROR(*src_pmd);
- pmd_clear(src_pmd);
+ if (pmd_none_or_clear_bad(src_pmd))
continue;
- }
err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
vma, addr, next);
if (err)
@@ -406,13 +408,8 @@ static int copy_pud_range(struct mm_stru
next = (addr + PUD_SIZE) & PUD_MASK;
if (next > end || next <= addr)
next = end;
- if (pud_none(*src_pud))
- continue;
- if (pud_bad(*src_pud)) {
- pud_ERROR(*src_pud);
- pud_clear(src_pud);
+ if (pud_none_or_clear_bad(src_pud))
continue;
- }
err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
vma, addr, next);
if (err)
@@ -441,13 +438,8 @@ int copy_page_range(struct mm_struct *ds
next = (addr + PGDIR_SIZE) & PGDIR_MASK;
if (next > end || next <= addr)
next = end;
- if (pgd_none(*src_pgd))
+ if (pgd_none_or_clear_bad(src_pgd))
goto next_pgd;
- if (pgd_bad(*src_pgd)) {
- pgd_ERROR(*src_pgd);
- pgd_clear(src_pgd);
- goto next_pgd;
- }
err = copy_pud_range(dst, src, dst_pgd, src_pgd,
vma, addr, next);
if (err)
@@ -469,13 +461,8 @@ static void zap_pte_range(struct mmu_gat
unsigned long offset;
pte_t *ptep;
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return;
- if (unlikely(pmd_bad(*pmd))) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
ptep = pte_offset_map(pmd, address);
offset = address & ~PMD_MASK;
if (offset + size > PMD_SIZE)
@@ -553,13 +540,8 @@ static void zap_pmd_range(struct mmu_gat
pmd_t * pmd;
unsigned long end;
- if (pud_none(*pud))
- return;
- if (unlikely(pud_bad(*pud))) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
return;
- }
pmd = pmd_offset(pud, address);
end = address + size;
if (end > ((address + PUD_SIZE) & PUD_MASK))
@@ -577,13 +559,8 @@ static void zap_pud_range(struct mmu_gat
{
pud_t * pud;
- if (pgd_none(*pgd))
- return;
- if (unlikely(pgd_bad(*pgd))) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ if (pgd_none_or_clear_bad(pgd))
return;
- }
pud = pud_offset(pgd, address);
do {
zap_pmd_range(tlb, pud, address, end - address, details);
--- 2.6.11-bk5/mm/mprotect.c 2005-03-09 01:12:53.000000000 +0000
+++ ptwalk1/mm/mprotect.c 2005-03-09 01:35:49.000000000 +0000
@@ -32,13 +32,8 @@ change_pte_range(struct mm_struct *mm, p
pte_t * pte;
unsigned long base, end;
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
pte = pte_offset_map(pmd, address);
base = address & PMD_MASK;
address &= ~PMD_MASK;
@@ -69,13 +64,8 @@ change_pmd_range(struct mm_struct *mm, p
pmd_t * pmd;
unsigned long base, end;
- if (pud_none(*pud))
- return;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
return;
- }
pmd = pmd_offset(pud, address);
base = address & PUD_MASK;
address &= ~PUD_MASK;
@@ -96,13 +86,8 @@ change_pud_range(struct mm_struct *mm, p
pud_t * pud;
unsigned long base, end;
- if (pgd_none(*pgd))
- return;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ if (pgd_none_or_clear_bad(pgd))
return;
- }
pud = pud_offset(pgd, address);
base = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
--- 2.6.11-bk5/mm/mremap.c 2005-03-09 01:12:53.000000000 +0000
+++ ptwalk1/mm/mremap.c 2005-03-09 01:35:49.000000000 +0000
@@ -30,26 +30,16 @@ static pte_t *get_one_pte_map_nested(str
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
+ if (pgd_none_or_clear_bad(pgd))
goto end;
pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ if (pud_none_or_clear_bad(pud))
goto end;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
- goto end;
- }
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
- goto end;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
+ if (pmd_none_or_clear_bad(pmd))
goto end;
- }
pte = pte_offset_map_nested(pmd, addr);
if (pte_none(*pte)) {
@@ -67,15 +57,17 @@ static pte_t *get_one_pte_map(struct mm_
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
+ if (pgd_none_or_clear_bad(pgd))
return NULL;
pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
+ if (pud_none_or_clear_bad(pud))
return NULL;
+
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return NULL;
+
return pte_offset_map(pmd, addr);
}
--- 2.6.11-bk5/mm/msync.c 2005-03-02 07:38:55.000000000 +0000
+++ ptwalk1/mm/msync.c 2005-03-09 01:35:49.000000000 +0000
@@ -45,13 +45,8 @@ static int filemap_sync_pte_range(pmd_t
pte_t *pte;
int error;
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return 0;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return 0;
- }
pte = pte_offset_map(pmd, address);
if ((address & PMD_MASK) != (end & PMD_MASK))
end = (address & PMD_MASK) + PMD_SIZE;
@@ -74,13 +69,8 @@ static inline int filemap_sync_pmd_range
pmd_t * pmd;
int error;
- if (pud_none(*pud))
- return 0;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
return 0;
- }
pmd = pmd_offset(pud, address);
if ((address & PUD_MASK) != (end & PUD_MASK))
end = (address & PUD_MASK) + PUD_SIZE;
@@ -100,13 +90,8 @@ static inline int filemap_sync_pud_range
pud_t *pud;
int error;
- if (pgd_none(*pgd))
- return 0;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ if (pgd_none_or_clear_bad(pgd))
return 0;
- }
pud = pud_offset(pgd, address);
if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
end = (address & PGDIR_MASK) + PGDIR_SIZE;
--- 2.6.11-bk5/mm/swapfile.c 2005-03-09 01:12:53.000000000 +0000
+++ ptwalk1/mm/swapfile.c 2005-03-09 01:35:49.000000000 +0000
@@ -441,13 +441,8 @@ static unsigned long unuse_pmd(struct vm
pte_t *pte;
pte_t swp_pte = swp_entry_to_pte(entry);
- if (pmd_none(*dir))
+ if (pmd_none_or_clear_bad(dir))
return 0;
- if (pmd_bad(*dir)) {
- pmd_ERROR(*dir);
- pmd_clear(dir);
- return 0;
- }
pte = pte_offset_map(dir, address);
do {
/*
@@ -483,13 +478,8 @@ static unsigned long unuse_pud(struct vm
unsigned long next;
unsigned long foundaddr;
- if (pud_none(*pud))
- return 0;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
return 0;
- }
pmd = pmd_offset(pud, address);
do {
next = (address + PMD_SIZE) & PMD_MASK;
@@ -513,13 +503,8 @@ static unsigned long unuse_pgd(struct vm
unsigned long next;
unsigned long foundaddr;
- if (pgd_none(*pgd))
- return 0;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ if (pgd_none_or_clear_bad(pgd))
return 0;
- }
pud = pud_offset(pgd, address);
do {
next = (address + PUD_SIZE) & PUD_MASK;
--- 2.6.11-bk5/mm/vmalloc.c 2005-03-09 01:12:53.000000000 +0000
+++ ptwalk1/mm/vmalloc.c 2005-03-09 01:35:49.000000000 +0000
@@ -29,13 +29,8 @@ static void unmap_area_pte(pmd_t *pmd, u
unsigned long base, end;
pte_t *pte;
- if (pmd_none(*pmd))
+ if (pmd_none_or_clear_bad(pmd))
return;
- if (pmd_bad(*pmd)) {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- return;
- }
pte = pte_offset_kernel(pmd, address);
base = address & PMD_MASK;
@@ -63,13 +58,8 @@ static void unmap_area_pmd(pud_t *pud, u
unsigned long base, end;
pmd_t *pmd;
- if (pud_none(*pud))
- return;
- if (pud_bad(*pud)) {
- pud_ERROR(*pud);
- pud_clear(pud);
+ if (pud_none_or_clear_bad(pud))
return;
- }
pmd = pmd_offset(pud, address);
base = address & PUD_MASK;
@@ -91,13 +81,8 @@ static void unmap_area_pud(pgd_t *pgd, u
pud_t *pud;
unsigned long base, end;
- if (pgd_none(*pgd))
- return;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+ if (pgd_none_or_clear_bad(pgd))
return;
- }
pud = pud_offset(pgd, address);
base = address & PGDIR_MASK;
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 2/15] ptwalk: change_protection
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
2005-03-09 22:06 ` [PATCH 1/15] ptwalk: p?d_none_or_clear_bad Hugh Dickins
@ 2005-03-09 22:07 ` Hugh Dickins
2005-03-09 22:08 ` [PATCH 3/15] ptwalk: sync_page_range Hugh Dickins
` (13 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:07 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Begin the pagetable walker cleanup with a straightforward example,
mprotect's change_protection. Started out from Nick Piggin's for_each
proposal, but I prefer less hidden; and these are all do while loops,
which degrade slightly when converted to for loops.
Firmly agree with Andi and Nick that addr,end is the way to go: size is
good at the user interface level, but unhelpful down in the loops. And
the habit of an "address" which is actually an offset from some base has
bitten us several times: use proper address at each level, whyever not?
Don't apply each mask at two levels: all we need is a set of macros
pgd_addr_end, pud_addr_end, pmd_addr_end to give the address of the end
of each range. Which need to take the min of two addresses, with 0 as
the greatest. Started out with a different macro, assumed end never 0;
but clear_page_range (alone) might be passed end 0 by some out-of-tree
memory layouts: could special case it, but this macro compiles smaller.
Check "addr != end" instead of "addr < end" to work on that end 0 case.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
include/asm-generic/pgtable.h | 21 ++++++++
mm/mprotect.c | 104 +++++++++++++++---------------------------
2 files changed, 59 insertions(+), 66 deletions(-)
--- ptwalk1/include/asm-generic/pgtable.h 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk2/include/asm-generic/pgtable.h 2005-03-09 01:36:01.000000000 +0000
@@ -135,6 +135,27 @@ static inline void ptep_set_wrprotect(st
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+/*
+ * When walking page tables, get the address of the next boundary, or
+ * the end address of the range if that comes earlier. Although end might
+ * wrap to 0 only in clear_page_range, __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#define pud_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#define pmd_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
#ifndef __ASSEMBLY__
/*
* When walking page tables, we usually want to skip any p?d_none entries;
--- ptwalk1/mm/mprotect.c 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk2/mm/mprotect.c 2005-03-09 01:36:01.000000000 +0000
@@ -25,104 +25,76 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-static inline void
-change_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long address,
- unsigned long size, pgprot_t newprot)
+static inline void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
{
- pte_t * pte;
- unsigned long base, end;
+ pte_t *pte;
if (pmd_none_or_clear_bad(pmd))
return;
- pte = pte_offset_map(pmd, address);
- base = address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+ pte = pte_offset_map(pmd, addr);
do {
if (pte_present(*pte)) {
- pte_t entry;
+ pte_t ptent;
/* Avoid an SMP race with hardware updated dirty/clean
* bits by wiping the pte and then setting the new pte
* into place.
*/
- entry = ptep_get_and_clear(mm, base + address, pte);
- set_pte_at(mm, base + address, pte, pte_modify(entry, newprot));
+ ptent = ptep_get_and_clear(mm, addr, pte);
+ set_pte_at(mm, addr, pte, pte_modify(ptent, newprot));
}
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
+ } while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(pte - 1);
}
-static inline void
-change_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long address,
- unsigned long size, pgprot_t newprot)
+static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
{
- pmd_t * pmd;
- unsigned long base, end;
+ pmd_t *pmd;
+ unsigned long next;
if (pud_none_or_clear_bad(pud))
return;
- pmd = pmd_offset(pud, address);
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
+ pmd = pmd_offset(pud, addr);
do {
- change_pte_range(mm, pmd, base + address, end - address, newprot);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ next = pmd_addr_end(addr, end);
+ change_pte_range(mm, pmd, addr, next, newprot);
+ } while (pmd++, addr = next, addr != end);
}
-static inline void
-change_pud_range(struct mm_struct *mm, pgd_t *pgd, unsigned long address,
- unsigned long size, pgprot_t newprot)
+static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
{
- pud_t * pud;
- unsigned long base, end;
+ pud_t *pud;
+ unsigned long next;
if (pgd_none_or_clear_bad(pgd))
return;
- pud = pud_offset(pgd, address);
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+ pud = pud_offset(pgd, addr);
do {
- change_pmd_range(mm, pud, base + address, end - address, newprot);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
+ next = pud_addr_end(addr, end);
+ change_pmd_range(mm, pud, addr, next, newprot);
+ } while (pud++, addr = next, addr != end);
}
-static void
-change_protection(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgprot_t newprot)
+static void change_protection(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, pgprot_t newprot)
{
- struct mm_struct *mm = current->mm;
+ struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
- unsigned long beg = start, next;
- int i;
+ unsigned long next;
+ unsigned long start = addr;
- pgd = pgd_offset(mm, start);
- flush_cache_range(vma, beg, end);
- BUG_ON(start >= end);
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
- next = (start + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= start || next > end)
- next = end;
- change_pud_range(mm, pgd, start, next - start, newprot);
- start = next;
- pgd++;
- }
- flush_tlb_range(vma, beg, end);
+ do {
+ next = pgd_addr_end(addr, end);
+ change_pud_range(mm, pgd, addr, next, newprot);
+ } while (pgd++, addr = next, addr != end);
+ flush_tlb_range(vma, start, end);
spin_unlock(&mm->page_table_lock);
}
@@ -130,7 +102,7 @@ static int
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
unsigned long start, unsigned long end, unsigned long newflags)
{
- struct mm_struct * mm = vma->vm_mm;
+ struct mm_struct *mm = vma->vm_mm;
unsigned long oldflags = vma->vm_flags;
long nrpages = (end - start) >> PAGE_SHIFT;
unsigned long charged = 0;
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 3/15] ptwalk: sync_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
2005-03-09 22:06 ` [PATCH 1/15] ptwalk: p?d_none_or_clear_bad Hugh Dickins
2005-03-09 22:07 ` [PATCH 2/15] ptwalk: change_protection Hugh Dickins
@ 2005-03-09 22:08 ` Hugh Dickins
2005-03-09 22:08 ` [PATCH 4/15] ptwalk: unuse_mm Hugh Dickins
` (12 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:08 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert filemap_sync pagetable walkers to loops using p?d_addr_end; use
similar loop to split filemap_sync into chunks. Merge filemap_sync_pte
into sync_pte_range, cut filemap_ off the longer names, vma arg first.
There is no error from filemap_sync, nor is any use made of the flags:
if it should do something else for MS_INVALIDATE, reinstate it when that
is implemented. Remove the redundant flush_tlb_range from afterwards:
as its comment noted, each dirty pte has already been flushed.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/msync.c | 180 ++++++++++++++++++++++---------------------------------------
1 files changed, 67 insertions(+), 113 deletions(-)
--- ptwalk2/mm/msync.c 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk3/mm/msync.c 2005-03-09 01:36:14.000000000 +0000
@@ -21,155 +21,109 @@
* Called with mm->page_table_lock held to protect against other
* threads/the swapper from ripping pte's out from under us.
*/
-static int filemap_sync_pte(pte_t *ptep, struct vm_area_struct *vma,
- unsigned long address, unsigned int flags)
-{
- pte_t pte = *ptep;
- unsigned long pfn = pte_pfn(pte);
- struct page *page;
- if (pte_present(pte) && pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (!PageReserved(page) &&
- (ptep_clear_flush_dirty(vma, address, ptep) ||
- page_test_and_clear_dirty(page)))
- set_page_dirty(page);
- }
- return 0;
-}
-
-static int filemap_sync_pte_range(pmd_t * pmd,
- unsigned long address, unsigned long end,
- struct vm_area_struct *vma, unsigned int flags)
+static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
{
pte_t *pte;
- int error;
if (pmd_none_or_clear_bad(pmd))
- return 0;
- pte = pte_offset_map(pmd, address);
- if ((address & PMD_MASK) != (end & PMD_MASK))
- end = (address & PMD_MASK) + PMD_SIZE;
- error = 0;
+ return;
+ pte = pte_offset_map(pmd, addr);
do {
- error |= filemap_sync_pte(pte, vma, address, flags);
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
+ unsigned long pfn;
+ struct page *page;
- pte_unmap(pte - 1);
+ if (!pte_present(*pte))
+ continue;
+ pfn = pte_pfn(*pte);
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageReserved(page))
+ continue;
- return error;
+ if (ptep_clear_flush_dirty(vma, addr, pte) ||
+ page_test_and_clear_dirty(page))
+ set_page_dirty(page);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
}
-static inline int filemap_sync_pmd_range(pud_t * pud,
- unsigned long address, unsigned long end,
- struct vm_area_struct *vma, unsigned int flags)
+static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end)
{
- pmd_t * pmd;
- int error;
+ pmd_t *pmd;
+ unsigned long next;
if (pud_none_or_clear_bad(pud))
- return 0;
- pmd = pmd_offset(pud, address);
- if ((address & PUD_MASK) != (end & PUD_MASK))
- end = (address & PUD_MASK) + PUD_SIZE;
- error = 0;
+ return;
+ pmd = pmd_offset(pud, addr);
do {
- error |= filemap_sync_pte_range(pmd, address, end, vma, flags);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return error;
+ next = pmd_addr_end(addr, end);
+ sync_pte_range(vma, pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
}
-static inline int filemap_sync_pud_range(pgd_t *pgd,
- unsigned long address, unsigned long end,
- struct vm_area_struct *vma, unsigned int flags)
+static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
{
pud_t *pud;
- int error;
+ unsigned long next;
if (pgd_none_or_clear_bad(pgd))
- return 0;
- pud = pud_offset(pgd, address);
- if ((address & PGDIR_MASK) != (end & PGDIR_MASK))
- end = (address & PGDIR_MASK) + PGDIR_SIZE;
- error = 0;
+ return;
+ pud = pud_offset(pgd, addr);
do {
- error |= filemap_sync_pmd_range(pud, address, end, vma, flags);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return error;
+ next = pud_addr_end(addr, end);
+ sync_pmd_range(vma, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
}
-static int __filemap_sync(struct vm_area_struct *vma, unsigned long address,
- size_t size, unsigned int flags)
+static void sync_page_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
{
+ struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
- unsigned long end = address + size;
unsigned long next;
- int i;
- int error = 0;
-
- /* Aquire the lock early; it may be possible to avoid dropping
- * and reaquiring it repeatedly.
- */
- spin_lock(&vma->vm_mm->page_table_lock);
-
- pgd = pgd_offset(vma->vm_mm, address);
- flush_cache_range(vma, address, end);
/* For hugepages we can't go walking the page table normally,
* but that's ok, hugetlbfs is memory based, so we don't need
* to do anything more on an msync() */
if (is_vm_hugetlb_page(vma))
- goto out;
-
- if (address >= end)
- BUG();
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= address || next > end)
- next = end;
- error |= filemap_sync_pud_range(pgd, address, next, vma, flags);
- address = next;
- pgd++;
- }
- /*
- * Why flush ? filemap_sync_pte already flushed the tlbs with the
- * dirty bits.
- */
- flush_tlb_range(vma, end - size, end);
- out:
- spin_unlock(&vma->vm_mm->page_table_lock);
+ return;
- return error;
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
+ spin_lock(&mm->page_table_lock);
+ do {
+ next = pgd_addr_end(addr, end);
+ sync_pud_range(vma, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+ spin_unlock(&mm->page_table_lock);
}
#ifdef CONFIG_PREEMPT
-static int filemap_sync(struct vm_area_struct *vma, unsigned long address,
- size_t size, unsigned int flags)
+static void filemap_sync(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
{
const size_t chunk = 64 * 1024; /* bytes */
- int error = 0;
-
- while (size) {
- size_t sz = min(size, chunk);
+ unsigned long next;
- error |= __filemap_sync(vma, address, sz, flags);
+ do {
+ next = addr + chunk;
+ if (next > end || next < addr)
+ next = end;
+ sync_page_range(vma, addr, next);
cond_resched();
- address += sz;
- size -= sz;
- }
- return error;
+ } while (addr = next, addr != end);
}
#else
-static int filemap_sync(struct vm_area_struct *vma, unsigned long address,
- size_t size, unsigned int flags)
+static void filemap_sync(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end)
{
- return __filemap_sync(vma, address, size, flags);
+ sync_page_range(vma, addr, end);
}
#endif
@@ -184,19 +138,19 @@ static int filemap_sync(struct vm_area_s
* So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/
-static int msync_interval(struct vm_area_struct * vma,
- unsigned long start, unsigned long end, int flags)
+static int msync_interval(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end, int flags)
{
int ret = 0;
- struct file * file = vma->vm_file;
+ struct file *file = vma->vm_file;
if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
return -EBUSY;
if (file && (vma->vm_flags & VM_SHARED)) {
- ret = filemap_sync(vma, start, end-start, flags);
+ filemap_sync(vma, addr, end);
- if (!ret && (flags & MS_SYNC)) {
+ if (flags & MS_SYNC) {
struct address_space *mapping = file->f_mapping;
int err;
@@ -221,7 +175,7 @@ static int msync_interval(struct vm_area
asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
{
unsigned long end;
- struct vm_area_struct * vma;
+ struct vm_area_struct *vma;
int unmapped_error, error = -EINVAL;
if (flags & MS_SYNC)
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 4/15] ptwalk: unuse_mm
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (2 preceding siblings ...)
2005-03-09 22:08 ` [PATCH 3/15] ptwalk: sync_page_range Hugh Dickins
@ 2005-03-09 22:08 ` Hugh Dickins
2005-03-09 22:09 ` [PATCH 5/15] ptwalk: map and unmap_vm_area Hugh Dickins
` (11 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:08 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert unuse_process pagetable walkers to loops using p?d_addr_end; but
correct its name to unuse_mm, rename its levels to _range as elsewhere.
Leave unuse_pte out-of-line since it's so rarely called; but move the
funny activate_page inside it. foundaddr was a leftover from before: we
still want to break out once page is found, but no need to pass addr up.
And we need not comment on the page_table_lock at every level.
Whereas most objects shrink ~200 bytes text, swapfile.o grows slightly:
it had earlier been converted to the addr,end style to fix a 4level bug.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/swapfile.c | 148 +++++++++++++++++++++-------------------------------------
1 files changed, 56 insertions(+), 92 deletions(-)
--- ptwalk3/mm/swapfile.c 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk4/mm/swapfile.c 2005-03-09 01:36:25.000000000 +0000
@@ -412,154 +412,121 @@ void free_swap_and_cache(swp_entry_t ent
}
/*
- * The swap entry has been read in advance, and we return 1 to indicate
- * that the page has been used or is no longer needed.
- *
* Always set the resulting pte to be nowrite (the same as COW pages
* after one process has exited). We don't know just how many PTEs will
* share this swap entry, so be cautious and let do_wp_page work out
* what to do if a write is requested later.
+ *
+ * vma->vm_mm->page_table_lock is held.
*/
-/* vma->vm_mm->page_table_lock is held */
-static void
-unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
- swp_entry_t entry, struct page *page)
+static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+ unsigned long addr, swp_entry_t entry, struct page *page)
{
vma->vm_mm->rss++;
get_page(page);
- set_pte_at(vma->vm_mm, address, dir,
+ set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
- page_add_anon_rmap(page, vma, address);
+ page_add_anon_rmap(page, vma, addr);
swap_free(entry);
+ /*
+ * Move the page to the active list so it is not
+ * immediately swapped out again after swapon.
+ */
+ activate_page(page);
}
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pmd(struct vm_area_struct *vma, pmd_t *dir,
- unsigned long address, unsigned long end,
- swp_entry_t entry, struct page *page)
+static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
{
pte_t *pte;
pte_t swp_pte = swp_entry_to_pte(entry);
- if (pmd_none_or_clear_bad(dir))
+ if (pmd_none_or_clear_bad(pmd))
return 0;
- pte = pte_offset_map(dir, address);
+ pte = pte_offset_map(pmd, addr);
do {
/*
* swapoff spends a _lot_ of time in this loop!
* Test inline before going to call unuse_pte.
*/
if (unlikely(pte_same(*pte, swp_pte))) {
- unuse_pte(vma, address, pte, entry, page);
+ unuse_pte(vma, pte, addr, entry, page);
pte_unmap(pte);
-
- /*
- * Move the page to the active list so it is not
- * immediately swapped out again after swapon.
- */
- activate_page(page);
-
- /* add 1 since address may be 0 */
- return 1 + address;
+ return 1;
}
- address += PAGE_SIZE;
- pte++;
- } while (address < end);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap(pte - 1);
return 0;
}
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pud(struct vm_area_struct *vma, pud_t *pud,
- unsigned long address, unsigned long end,
- swp_entry_t entry, struct page *page)
+static int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
{
pmd_t *pmd;
unsigned long next;
- unsigned long foundaddr;
if (pud_none_or_clear_bad(pud))
return 0;
- pmd = pmd_offset(pud, address);
+ pmd = pmd_offset(pud, addr);
do {
- next = (address + PMD_SIZE) & PMD_MASK;
- if (next > end || !next)
- next = end;
- foundaddr = unuse_pmd(vma, pmd, address, next, entry, page);
- if (foundaddr)
- return foundaddr;
- address = next;
- pmd++;
- } while (address < end);
+ next = pmd_addr_end(addr, end);
+ if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+ return 1;
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pgd(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long address, unsigned long end,
- swp_entry_t entry, struct page *page)
+static int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ swp_entry_t entry, struct page *page)
{
pud_t *pud;
unsigned long next;
- unsigned long foundaddr;
if (pgd_none_or_clear_bad(pgd))
return 0;
- pud = pud_offset(pgd, address);
+ pud = pud_offset(pgd, addr);
do {
- next = (address + PUD_SIZE) & PUD_MASK;
- if (next > end || !next)
- next = end;
- foundaddr = unuse_pud(vma, pud, address, next, entry, page);
- if (foundaddr)
- return foundaddr;
- address = next;
- pud++;
- } while (address < end);
+ next = pud_addr_end(addr, end);
+ if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+ return 1;
+ } while (pud++, addr = next, addr != end);
return 0;
}
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_vma(struct vm_area_struct *vma,
- swp_entry_t entry, struct page *page)
+static int unuse_vma(struct vm_area_struct *vma,
+ swp_entry_t entry, struct page *page)
{
pgd_t *pgd;
- unsigned long address, next, end;
- unsigned long foundaddr;
+ unsigned long addr, end, next;
if (page->mapping) {
- address = page_address_in_vma(page, vma);
- if (address == -EFAULT)
+ addr = page_address_in_vma(page, vma);
+ if (addr == -EFAULT)
return 0;
else
- end = address + PAGE_SIZE;
+ end = addr + PAGE_SIZE;
} else {
- address = vma->vm_start;
+ addr = vma->vm_start;
end = vma->vm_end;
}
- pgd = pgd_offset(vma->vm_mm, address);
+
+ pgd = pgd_offset(vma->vm_mm, addr);
do {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || !next)
- next = end;
- foundaddr = unuse_pgd(vma, pgd, address, next, entry, page);
- if (foundaddr)
- return foundaddr;
- address = next;
- pgd++;
- } while (address < end);
+ next = pgd_addr_end(addr, end);
+ if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+ return 1;
+ } while (pgd++, addr = next, addr != end);
return 0;
}
-static int unuse_process(struct mm_struct * mm,
- swp_entry_t entry, struct page* page)
+static int unuse_mm(struct mm_struct *mm,
+ swp_entry_t entry, struct page *page)
{
- struct vm_area_struct* vma;
- unsigned long foundaddr = 0;
+ struct vm_area_struct *vma;
- /*
- * Go through process' page directory.
- */
if (!down_read_trylock(&mm->mmap_sem)) {
/*
* Our reference to the page stops try_to_unmap_one from
@@ -571,16 +538,13 @@ static int unuse_process(struct mm_struc
}
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (vma->anon_vma) {
- foundaddr = unuse_vma(vma, entry, page);
- if (foundaddr)
- break;
- }
+ if (vma->anon_vma && unuse_vma(vma, entry, page))
+ break;
}
spin_unlock(&mm->page_table_lock);
up_read(&mm->mmap_sem);
/*
- * Currently unuse_process cannot fail, but leave error handling
+ * Currently unuse_mm cannot fail, but leave error handling
* at call sites for now, since we change it from time to time.
*/
return 0;
@@ -724,7 +688,7 @@ static int try_to_unuse(unsigned int typ
if (start_mm == &init_mm)
shmem = shmem_unuse(entry, page);
else
- retval = unuse_process(start_mm, entry, page);
+ retval = unuse_mm(start_mm, entry, page);
}
if (*swap_map > 1) {
int set_start_mm = (*swap_map >= swcount);
@@ -756,7 +720,7 @@ static int try_to_unuse(unsigned int typ
set_start_mm = 1;
shmem = shmem_unuse(entry, page);
} else
- retval = unuse_process(mm, entry, page);
+ retval = unuse_mm(mm, entry, page);
if (set_start_mm && *swap_map < swcount) {
mmput(new_start_mm);
atomic_inc(&mm->mm_users);
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 5/15] ptwalk: map and unmap_vm_area
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (3 preceding siblings ...)
2005-03-09 22:08 ` [PATCH 4/15] ptwalk: unuse_mm Hugh Dickins
@ 2005-03-09 22:09 ` Hugh Dickins
2005-03-09 22:10 ` [PATCH 6/15] ptwalk: ioremap_page_range Hugh Dickins
` (10 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:09 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert unmap_vm_area and map_vm_area pagetable walkers to loops using
p?d_addr_end; rename internal levels vunmap_p??_range, vmap_p??_range.
map_vm_area shows the style when allocating: allocs moved down a level.
Replace KERN_CRIT Whee message by boring WARN_ON.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/vmalloc.c | 216 +++++++++++++++++++++--------------------------------------
1 files changed, 77 insertions(+), 139 deletions(-)
--- ptwalk4/mm/vmalloc.c 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk5/mm/vmalloc.c 2005-03-09 01:36:38.000000000 +0000
@@ -23,199 +23,137 @@
DEFINE_RWLOCK(vmlist_lock);
struct vm_struct *vmlist;
-static void unmap_area_pte(pmd_t *pmd, unsigned long address,
- unsigned long size)
+static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
{
- unsigned long base, end;
pte_t *pte;
if (pmd_none_or_clear_bad(pmd))
return;
- pte = pte_offset_kernel(pmd, address);
- base = address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
-
+ pte = pte_offset_kernel(pmd, addr);
do {
- pte_t page;
- page = ptep_get_and_clear(&init_mm, base + address, pte);
- address += PAGE_SIZE;
- pte++;
- if (pte_none(page))
- continue;
- if (pte_present(page))
- continue;
- printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
- } while (address < end);
+ pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
+ WARN_ON(!pte_none(ptent) && !pte_present(ptent));
+ } while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void unmap_area_pmd(pud_t *pud, unsigned long address,
- unsigned long size)
+static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
{
- unsigned long base, end;
pmd_t *pmd;
+ unsigned long next;
if (pud_none_or_clear_bad(pud))
return;
- pmd = pmd_offset(pud, address);
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
-
+ pmd = pmd_offset(pud, addr);
do {
- unmap_area_pte(pmd, base + address, end - address);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
+ next = pmd_addr_end(addr, end);
+ vunmap_pte_range(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
}
-static void unmap_area_pud(pgd_t *pgd, unsigned long address,
- unsigned long size)
+static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
{
pud_t *pud;
- unsigned long base, end;
+ unsigned long next;
if (pgd_none_or_clear_bad(pgd))
return;
- pud = pud_offset(pgd, address);
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ vunmap_pmd_range(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+void unmap_vm_area(struct vm_struct *area)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ unsigned long addr = (unsigned long) area->addr;
+ unsigned long end = addr + area->size;
+ BUG_ON(addr >= end);
+ pgd = pgd_offset_k(addr);
+ flush_cache_vunmap(addr, end);
do {
- unmap_area_pmd(pud, base + address, end - address);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
-}
-
-static int map_area_pte(pte_t *pte, unsigned long address,
- unsigned long size, pgprot_t prot,
- struct page ***pages)
-{
- unsigned long base, end;
-
- base = address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+ next = pgd_addr_end(addr, end);
+ vunmap_pud_range(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+ flush_tlb_kernel_range((unsigned long) area->addr, end);
+}
+
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page ***pages)
+{
+ pte_t *pte;
+ pte = pte_alloc_kernel(&init_mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
struct page *page = **pages;
WARN_ON(!pte_none(*pte));
if (!page)
return -ENOMEM;
-
- set_pte_at(&init_mm, base + address, pte, mk_pte(page, prot));
- address += PAGE_SIZE;
- pte++;
+ set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*pages)++;
- } while (address < end);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
return 0;
}
-static int map_area_pmd(pmd_t *pmd, unsigned long address,
- unsigned long size, pgprot_t prot,
- struct page ***pages)
-{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
+static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page ***pages)
+{
+ pmd_t *pmd;
+ unsigned long next;
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
- if (!pte)
- return -ENOMEM;
- if (map_area_pte(pte, base + address, end - address, prot, pages))
+ next = pmd_addr_end(addr, end);
+ if (vmap_pte_range(pmd, addr, next, prot, pages))
return -ENOMEM;
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address < end);
-
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static int map_area_pud(pud_t *pud, unsigned long address,
- unsigned long end, pgprot_t prot,
- struct page ***pages)
+static int vmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page ***pages)
{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(&init_mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
- if (!pmd)
+ next = pud_addr_end(addr, end);
+ if (vmap_pmd_range(pud, addr, next, prot, pages))
return -ENOMEM;
- if (map_area_pmd(pmd, address, end - address, prot, pages))
- return -ENOMEM;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && address < end);
-
+ } while (pud++, addr = next, addr != end);
return 0;
}
-void unmap_vm_area(struct vm_struct *area)
-{
- unsigned long address = (unsigned long) area->addr;
- unsigned long end = (address + area->size);
- unsigned long next;
- pgd_t *pgd;
- int i;
-
- pgd = pgd_offset_k(address);
- flush_cache_vunmap(address, end);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= address || next > end)
- next = end;
- unmap_area_pud(pgd, address, next - address);
- address = next;
- pgd++;
- }
- flush_tlb_kernel_range((unsigned long) area->addr, end);
-}
-
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
{
- unsigned long address = (unsigned long) area->addr;
- unsigned long end = address + (area->size-PAGE_SIZE);
- unsigned long next;
pgd_t *pgd;
- int err = 0;
- int i;
+ unsigned long next;
+ unsigned long addr = (unsigned long) area->addr;
+ unsigned long end = addr + area->size - PAGE_SIZE;
+ int err;
- pgd = pgd_offset_k(address);
+ BUG_ON(addr >= end);
+ pgd = pgd_offset_k(addr);
spin_lock(&init_mm.page_table_lock);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(&init_mm, pgd, address);
- if (!pud) {
- err = -ENOMEM;
- break;
- }
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next < address || next > end)
- next = end;
- if (map_area_pud(pud, address, next, prot, pages)) {
- err = -ENOMEM;
+ do {
+ next = pgd_addr_end(addr, end);
+ err = vmap_pud_range(pgd, addr, next, prot, pages);
+ if (err)
break;
- }
-
- address = next;
- pgd++;
- }
-
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&init_mm.page_table_lock);
flush_cache_vmap((unsigned long) area->addr, end);
return err;
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 6/15] ptwalk: ioremap_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (4 preceding siblings ...)
2005-03-09 22:09 ` [PATCH 5/15] ptwalk: map and unmap_vm_area Hugh Dickins
@ 2005-03-09 22:10 ` Hugh Dickins
2005-03-09 22:10 ` [PATCH 7/15] ptwalk: remap_pfn_range Hugh Dickins
` (9 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:10 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert i386 ioremap pagetable walkers to loops using p?d_addr_end.
Rename internal levels ioremap_p??_range. Don't cheat, give it a real
(but inlined) ioremap_pud_range; uninline lowest level to help debug.
Replace "page already exists" printk and BUG by BUG_ON.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
arch/i386/mm/ioremap.c | 112 +++++++++++++++++++++++--------------------------
1 files changed, 53 insertions(+), 59 deletions(-)
--- ptwalk5/arch/i386/mm/ioremap.c 2005-03-09 01:12:39.000000000 +0000
+++ ptwalk6/arch/i386/mm/ioremap.c 2005-03-09 01:36:51.000000000 +0000
@@ -20,89 +20,82 @@
#define ISA_START_ADDRESS 0xa0000
#define ISA_END_ADDRESS 0x100000
-static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
+static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, unsigned long phys_addr, unsigned long flags)
{
- unsigned long end;
+ pte_t *pte;
unsigned long pfn;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
pfn = phys_addr >> PAGE_SHIFT;
+ pte = pte_alloc_kernel(&init_mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
- if (!pte_none(*pte)) {
- printk("remap_area_pte: page already exists\n");
- BUG();
- }
+ BUG_ON(!pte_none(*pte));
set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW |
_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
- address += PAGE_SIZE;
pfn++;
- pte++;
- } while (address && (address < end));
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ return 0;
}
-static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
+static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, unsigned long phys_addr, unsigned long flags)
{
- unsigned long end;
+ pmd_t *pmd;
+ unsigned long next;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
+ phys_addr -= addr;
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags))
return -ENOMEM;
- remap_area_pte(pte, address, end - address, address + phys_addr, flags);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static int remap_area_pages(unsigned long address, unsigned long phys_addr,
- unsigned long size, unsigned long flags)
+static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, unsigned long phys_addr, unsigned long flags)
{
- int error;
- pgd_t * dir;
- unsigned long end = address + size;
+ pud_t *pud;
+ unsigned long next;
- phys_addr -= address;
- dir = pgd_offset(&init_mm, address);
+ phys_addr -= addr;
+ pud = pud_alloc(&init_mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+static int ioremap_page_range(unsigned long addr,
+ unsigned long end, unsigned long phys_addr, unsigned long flags)
+{
+ pgd_t *pgd;
+ unsigned long next;
+ int err;
+
+ BUG_ON(addr >= end);
flush_cache_all();
- if (address >= end)
- BUG();
+ phys_addr -= addr;
+ pgd = pgd_offset_k(addr);
spin_lock(&init_mm.page_table_lock);
do {
- pud_t *pud;
- pmd_t *pmd;
-
- error = -ENOMEM;
- pud = pud_alloc(&init_mm, dir, address);
- if (!pud)
- break;
- pmd = pmd_alloc(&init_mm, pud, address);
- if (!pmd)
- break;
- if (remap_area_pmd(pmd, address, end - address,
- phys_addr + address, flags))
+ next = pgd_addr_end(addr, end);
+ err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
+ if (err)
break;
- error = 0;
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- dir++;
- } while (address && (address < end));
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
- return error;
+ return err;
}
/*
@@ -165,7 +158,8 @@ void __iomem * __ioremap(unsigned long p
return NULL;
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
- if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+ if (ioremap_page_range((unsigned long) addr,
+ (unsigned long) addr + size, phys_addr, flags)) {
vunmap((void __force *) addr);
return NULL;
}
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 7/15] ptwalk: remap_pfn_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (5 preceding siblings ...)
2005-03-09 22:10 ` [PATCH 6/15] ptwalk: ioremap_page_range Hugh Dickins
@ 2005-03-09 22:10 ` Hugh Dickins
2005-03-09 22:11 ` [PATCH 8/15] ptwalk: zeromap_page_range Hugh Dickins
` (8 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:10 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert remap_pfn_range pagetable walkers to loops using p?d_addr_end.
Remove the redundant flush_tlb_range from afterwards: as its comment
noted, there's already a BUG_ON(!pte_none).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 151 +++++++++++++++++++++++-------------------------------------
1 files changed, 59 insertions(+), 92 deletions(-)
--- ptwalk6/mm/memory.c 2005-03-09 01:35:49.000000000 +0000
+++ ptwalk7/mm/memory.c 2005-03-09 01:37:02.000000000 +0000
@@ -1089,97 +1089,74 @@ int zeromap_page_range(struct vm_area_st
* mappings are removed. any references to nonexistent pages results
* in null mappings (currently treated as "copy-on-access")
*/
-static inline void
-remap_pte_range(struct mm_struct *mm, pte_t * pte,
- unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+static inline int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pte_t *pte;
+
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte_at(mm, base+address, pte, pfn_pte(pfn, prot));
- address += PAGE_SIZE;
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
- pte++;
- } while (address && (address < end));
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
}
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
- unsigned long size, unsigned long pfn, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
- pfn -= (address >> PAGE_SHIFT);
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (remap_pte_range(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
return -ENOMEM;
- remap_pte_range(mm, pte, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address, unsigned long size,
- unsigned long pfn, pgprot_t prot)
-{
- unsigned long base, end;
- int error;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- pfn -= address >> PAGE_SHIFT;
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pfn -= addr >> PAGE_SHIFT;
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t *pmd = pmd_alloc(mm, pud, base+address);
- error = -ENOMEM;
- if (!pmd)
- break;
- error = remap_pmd_range(mm, pmd, base + address, end - address,
- (address >> PAGE_SHIFT) + pfn, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return error;
+ next = pud_addr_end(addr, end);
+ if (remap_pmd_range(mm, pud, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
}
/* Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
- int error = 0;
pgd_t *pgd;
- unsigned long beg = from;
- unsigned long end = from + size;
unsigned long next;
+ unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
- int i;
-
- pfn -= from >> PAGE_SHIFT;
- pgd = pgd_offset(mm, from);
- flush_cache_range(vma, beg, end);
- BUG_ON(from >= end);
+ int err;
/*
* Physically remapped pages are special. Tell the
@@ -1191,31 +1168,21 @@ int remap_pfn_range(struct vm_area_struc
*/
vma->vm_flags |= VM_IO | VM_RESERVED;
+ BUG_ON(addr >= end);
+ pfn -= addr >> PAGE_SHIFT;
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, from);
- error = -ENOMEM;
- if (!pud)
- break;
- next = (from + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= from)
- next = end;
- error = remap_pud_range(mm, pud, from, end - from,
- pfn + (from >> PAGE_SHIFT), prot);
- if (error)
+ do {
+ next = pgd_addr_end(addr, end);
+ err = remap_pud_range(mm, pgd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot);
+ if (err)
break;
- from = next;
- pgd++;
- }
- /*
- * Why flush? remap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
-
- return error;
+ return err;
}
-
EXPORT_SYMBOL(remap_pfn_range);
/*
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 8/15] ptwalk: zeromap_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (6 preceding siblings ...)
2005-03-09 22:10 ` [PATCH 7/15] ptwalk: remap_pfn_range Hugh Dickins
@ 2005-03-09 22:11 ` Hugh Dickins
2005-03-09 22:12 ` [PATCH 9/15] ptwalk: unmap_page_range Hugh Dickins
` (7 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:11 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert zeromap_page_range pagetable walkers to loops using p?d_addr_end.
Remove the redundant flush_tlb_range from afterwards: as its comment
noted, there's already a BUG_ON(!pte_none).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 143 ++++++++++++++++++++++--------------------------------------
1 files changed, 54 insertions(+), 89 deletions(-)
--- ptwalk7/mm/memory.c 2005-03-09 01:37:02.000000000 +0000
+++ ptwalk8/mm/memory.c 2005-03-09 01:37:15.000000000 +0000
@@ -975,113 +975,78 @@ out:
EXPORT_SYMBOL(get_user_pages);
-static void zeromap_pte_range(struct mm_struct *mm, pte_t * pte,
- unsigned long address,
- unsigned long size, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PMD_MASK;
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pte_t *pte;
+
+ pte = pte_alloc_map(mm, pmd, addr);
+ if (!pte)
+ return -ENOMEM;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(base+address), prot));
+ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
BUG_ON(!pte_none(*pte));
- set_pte_at(mm, base+address, pte, zero_pte);
- address += PAGE_SIZE;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
- unsigned long address, unsigned long size, pgprot_t prot)
-{
- unsigned long base, end;
-
- base = address & PUD_MASK;
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
+ set_pte_at(mm, addr, pte, zero_pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
+ return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
do {
- pte_t * pte = pte_alloc_map(mm, pmd, base + address);
- if (!pte)
+ next = pmd_addr_end(addr, end);
+ if (zeromap_pte_range(mm, pmd, addr, next, prot))
return -ENOMEM;
- zeromap_pte_range(mm, pte, base + address, end - address, prot);
- pte_unmap(pte);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ } while (pmd++, addr = next, addr != end);
return 0;
}
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
- unsigned long address,
- unsigned long size, pgprot_t prot)
-{
- unsigned long base, end;
- int error = 0;
-
- base = address & PGDIR_MASK;
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
do {
- pmd_t * pmd = pmd_alloc(mm, pud, base + address);
- error = -ENOMEM;
- if (!pmd)
- break;
- error = zeromap_pmd_range(mm, pmd, base + address,
- end - address, prot);
- if (error)
- break;
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
+ next = pud_addr_end(addr, end);
+ if (zeromap_pmd_range(mm, pud, addr, next, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
return 0;
}
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
- unsigned long size, pgprot_t prot)
+int zeromap_page_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgprot_t prot)
{
- int i;
- int error = 0;
- pgd_t * pgd;
- unsigned long beg = address;
- unsigned long end = address + size;
+ pgd_t *pgd;
unsigned long next;
+ unsigned long end = addr + size;
struct mm_struct *mm = vma->vm_mm;
+ int err;
- pgd = pgd_offset(mm, address);
- flush_cache_range(vma, beg, end);
- BUG_ON(address >= end);
- BUG_ON(end > vma->vm_end);
-
+ BUG_ON(addr >= end);
+ pgd = pgd_offset(mm, addr);
+ flush_cache_range(vma, addr, end);
spin_lock(&mm->page_table_lock);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- pud_t *pud = pud_alloc(mm, pgd, address);
- error = -ENOMEM;
- if (!pud)
- break;
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= beg || next > end)
- next = end;
- error = zeromap_pud_range(mm, pud, address,
- next - address, prot);
- if (error)
+ do {
+ next = pgd_addr_end(addr, end);
+ err = zeromap_pud_range(mm, pgd, addr, next, prot);
+ if (err)
break;
- address = next;
- pgd++;
- }
- /*
- * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
- */
- flush_tlb_range(vma, beg, end);
+ } while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
- return error;
+ return err;
}
/*
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 9/15] ptwalk: unmap_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (7 preceding siblings ...)
2005-03-09 22:11 ` [PATCH 8/15] ptwalk: zeromap_page_range Hugh Dickins
@ 2005-03-09 22:12 ` Hugh Dickins
2005-03-09 22:12 ` [PATCH 10/15] ptwalk: copy_page_range Hugh Dickins
` (6 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:12 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert unmap_page_range pagetable walkers to loops using p?d_addr_end.
Move blanking of irrelevant details up to unmap_page_range as Nick did.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 119 ++++++++++++++++++++++++++----------------------------------
1 files changed, 53 insertions(+), 66 deletions(-)
--- ptwalk8/mm/memory.c 2005-03-09 01:37:15.000000000 +0000
+++ ptwalk9/mm/memory.c 2005-03-09 01:38:00.000000000 +0000
@@ -454,29 +454,22 @@ next_pgd:
return err;
}
-static void zap_pte_range(struct mmu_gather *tlb,
- pmd_t *pmd, unsigned long address,
- unsigned long size, struct zap_details *details)
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- unsigned long offset;
- pte_t *ptep;
+ pte_t *pte;
if (pmd_none_or_clear_bad(pmd))
return;
- ptep = pte_offset_map(pmd, address);
- offset = address & ~PMD_MASK;
- if (offset + size > PMD_SIZE)
- size = PMD_SIZE - offset;
- size &= PAGE_MASK;
- if (details && !details->check_mapping && !details->nonlinear_vma)
- details = NULL;
- for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
- pte_t pte = *ptep;
- if (pte_none(pte))
+ pte = pte_offset_map(pmd, addr);
+ do {
+ pte_t ptent = *pte;
+ if (pte_none(ptent))
continue;
- if (pte_present(pte)) {
+ if (pte_present(ptent)) {
struct page *page = NULL;
- unsigned long pfn = pte_pfn(pte);
+ unsigned long pfn = pte_pfn(ptent);
if (pfn_valid(pfn)) {
page = pfn_to_page(pfn);
if (PageReserved(page))
@@ -500,20 +493,20 @@ static void zap_pte_range(struct mmu_gat
page->index > details->last_index))
continue;
}
- pte = ptep_get_and_clear(tlb->mm, address+offset, ptep);
- tlb_remove_tlb_entry(tlb, ptep, address+offset);
+ ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+ tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
- address+offset) != page->index)
- set_pte_at(tlb->mm, address+offset,
- ptep, pgoff_to_pte(page->index));
- if (pte_dirty(pte))
+ addr) != page->index)
+ set_pte_at(tlb->mm, addr, pte,
+ pgoff_to_pte(page->index));
+ if (pte_dirty(ptent))
set_page_dirty(page);
if (PageAnon(page))
tlb->mm->anon_rss--;
- else if (pte_young(pte))
+ else if (pte_young(ptent))
mark_page_accessed(page);
tlb->freed++;
page_remove_rmap(page);
@@ -526,68 +519,62 @@ static void zap_pte_range(struct mmu_gat
*/
if (unlikely(details))
continue;
- if (!pte_file(pte))
- free_swap_and_cache(pte_to_swp_entry(pte));
- pte_clear(tlb->mm, address+offset, ptep);
- }
- pte_unmap(ptep-1);
+ if (!pte_file(ptent))
+ free_swap_and_cache(pte_to_swp_entry(ptent));
+ pte_clear(tlb->mm, addr, pte);
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ pte_unmap(pte - 1);
}
-static void zap_pmd_range(struct mmu_gather *tlb,
- pud_t *pud, unsigned long address,
- unsigned long size, struct zap_details *details)
+static void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- pmd_t * pmd;
- unsigned long end;
+ pmd_t *pmd;
+ unsigned long next;
if (pud_none_or_clear_bad(pud))
return;
- pmd = pmd_offset(pud, address);
- end = address + size;
- if (end > ((address + PUD_SIZE) & PUD_MASK))
- end = ((address + PUD_SIZE) & PUD_MASK);
+ pmd = pmd_offset(pud, addr);
do {
- zap_pte_range(tlb, pmd, address, end - address, details);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
+ next = pmd_addr_end(addr, end);
+ zap_pte_range(tlb, pmd, addr, next, details);
+ } while (pmd++, addr = next, addr != end);
}
-static void zap_pud_range(struct mmu_gather *tlb,
- pgd_t * pgd, unsigned long address,
- unsigned long end, struct zap_details *details)
+static void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- pud_t * pud;
+ pud_t *pud;
+ unsigned long next;
if (pgd_none_or_clear_bad(pgd))
return;
- pud = pud_offset(pgd, address);
+ pud = pud_offset(pgd, addr);
do {
- zap_pmd_range(tlb, pud, address, end - address, details);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
+ next = pud_addr_end(addr, end);
+ zap_pmd_range(tlb, pud, addr, next, details);
+ } while (pud++, addr = next, addr != end);
}
-static void unmap_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, unsigned long address,
- unsigned long end, struct zap_details *details)
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
- unsigned long next;
pgd_t *pgd;
- int i;
+ unsigned long next;
- BUG_ON(address >= end);
- pgd = pgd_offset(vma->vm_mm, address);
+ if (details && !details->check_mapping && !details->nonlinear_vma)
+ details = NULL;
+
+ BUG_ON(addr >= end);
tlb_start_vma(tlb, vma);
- for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
- next = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (next <= address || next > end)
- next = end;
- zap_pud_range(tlb, pgd, address, next, details);
- address = next;
- pgd++;
- }
+ pgd = pgd_offset(vma->vm_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ zap_pud_range(tlb, pgd, addr, next, details);
+ } while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
}
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 10/15] ptwalk: copy_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (8 preceding siblings ...)
2005-03-09 22:12 ` [PATCH 9/15] ptwalk: unmap_page_range Hugh Dickins
@ 2005-03-09 22:12 ` Hugh Dickins
2005-03-09 22:13 ` [PATCH 11/15] ptwalk: copy_pte_range hang Hugh Dickins
` (5 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:12 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert copy_page_range pagetable walkers to loops using p?d_addr_end.
Merge copy_swap_pte into copy_one_pte, make a few minor tidyups.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 141 ++++++++++++++++++++++++------------------------------------
1 files changed, 57 insertions(+), 84 deletions(-)
--- ptwalk9/mm/memory.c 2005-03-09 01:38:00.000000000 +0000
+++ ptwalk10/mm/memory.c 2005-03-09 01:38:12.000000000 +0000
@@ -260,20 +260,7 @@ out:
*/
static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
- if (pte_file(pte))
- return;
- swap_duplicate(pte_to_swp_entry(pte));
- if (list_empty(&dst_mm->mmlist)) {
- spin_lock(&mmlist_lock);
- list_add(&dst_mm->mmlist, &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
unsigned long addr)
{
@@ -281,12 +268,21 @@ copy_one_pte(struct mm_struct *dst_mm,
struct page *page;
unsigned long pfn;
- /* pte contains position in swap, so copy. */
- if (!pte_present(pte)) {
- copy_swap_pte(dst_mm, src_mm, pte);
+ /* pte contains position in swap or file, so copy. */
+ if (unlikely(!pte_present(pte))) {
+ if (!pte_file(pte)) {
+ swap_duplicate(pte_to_swp_entry(pte));
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ list_add(&dst_mm->mmlist, &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ }
set_pte_at(dst_mm, addr, dst_pte, pte);
return;
}
+
pfn = pte_pfn(pte);
/* the pte points outside of valid memory, the
* mapping is assumed to be good, meaningful
@@ -326,25 +322,21 @@ copy_one_pte(struct mm_struct *dst_mm,
page_dup_rmap(page);
}
-static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pte_t *src_pte, *dst_pte;
- pte_t *s, *d;
unsigned long vm_flags = vma->vm_flags;
again:
- d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+ dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
if (!dst_pte)
return -ENOMEM;
+ src_pte = pte_offset_map_nested(src_pmd, addr);
spin_lock(&src_mm->page_table_lock);
- s = src_pte = pte_offset_map_nested(src_pmd, addr);
- for (; addr < end; s++, d++) {
- if (!pte_none(*s))
- copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
- addr += PAGE_SIZE;
+ do {
/*
* We are holding two locks at this point - either of them
* could generate latencies in another task on another CPU.
@@ -353,105 +345,86 @@ again:
need_lockbreak(&src_mm->page_table_lock) ||
need_lockbreak(&dst_mm->page_table_lock))
break;
- }
- pte_unmap_nested(src_pte);
- pte_unmap(dst_pte);
+ if (pte_none(*src_pte))
+ continue;
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
spin_unlock(&src_mm->page_table_lock);
+ pte_unmap_nested(src_pte - 1);
+ pte_unmap(dst_pte - 1);
cond_resched_lock(&dst_mm->page_table_lock);
- if (addr < end)
+ if (addr != end)
goto again;
return 0;
}
-static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pmd_t *src_pmd, *dst_pmd;
- int err = 0;
unsigned long next;
- src_pmd = pmd_offset(src_pud, addr);
dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
if (!dst_pmd)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
+ src_pmd = pmd_offset(src_pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(src_pmd))
continue;
- err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
+ if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+ return 0;
}
-static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
pud_t *src_pud, *dst_pud;
- int err = 0;
unsigned long next;
- src_pud = pud_offset(src_pgd, addr);
dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
if (!dst_pud)
return -ENOMEM;
-
- for (; addr < end; addr = next, src_pud++, dst_pud++) {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
+ src_pud = pud_offset(src_pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(src_pud))
continue;
- err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
- vma, addr, next);
- if (err)
- break;
- }
- return err;
+ if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+ return 0;
}
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
struct vm_area_struct *vma)
{
pgd_t *src_pgd, *dst_pgd;
- unsigned long addr, start, end, next;
- int err = 0;
+ unsigned long next;
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
if (is_vm_hugetlb_page(vma))
- return copy_hugetlb_page_range(dst, src, vma);
+ return copy_hugetlb_page_range(dst_mm, src_mm, vma);
- start = vma->vm_start;
- src_pgd = pgd_offset(src, start);
- dst_pgd = pgd_offset(dst, start);
-
- end = vma->vm_end;
- addr = start;
- while (addr && (addr < end-1)) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(src_pgd))
- goto next_pgd;
- err = copy_pud_range(dst, src, dst_pgd, src_pgd,
- vma, addr, next);
- if (err)
- break;
-
-next_pgd:
- src_pgd++;
- dst_pgd++;
- addr = next;
- }
-
- return err;
+ continue;
+ if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+ vma, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+ return 0;
}
static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 11/15] ptwalk: copy_pte_range hang
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (9 preceding siblings ...)
2005-03-09 22:12 ` [PATCH 10/15] ptwalk: copy_page_range Hugh Dickins
@ 2005-03-09 22:13 ` Hugh Dickins
2005-03-09 23:25 ` Nick Piggin
2005-03-09 22:14 ` [PATCH 12/15] ptwalk: clear_page_range Hugh Dickins
` (4 subsequent siblings)
15 siblings, 1 reply; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:13 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
This patch is the odd-one-out of the sequence. The one before adjusted
copy_pte_range from a for loop to a do while loop, and it was therefore
simplest to check for lockbreak before copying pte: possibility that it
might keep getting preempted without making progress under some loads.
Some loads such as startup: 2*HT*P4 with preemption cannot even reach
multiuser login. Suspect needs_lockbreak is broken, can get in a state
when it remains forever true. Investigate that later: for now, and for
all time, it makes sense to aim for a little progress before breaking
out; and we can manage more pte_nones than copies.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 11 ++++++++---
1 files changed, 8 insertions(+), 3 deletions(-)
--- ptwalk10/mm/memory.c 2005-03-09 01:38:12.000000000 +0000
+++ ptwalk11/mm/memory.c 2005-03-09 01:38:54.000000000 +0000
@@ -328,6 +328,7 @@ static int copy_pte_range(struct mm_stru
{
pte_t *src_pte, *dst_pte;
unsigned long vm_flags = vma->vm_flags;
+ int progress;
again:
dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
@@ -335,19 +336,23 @@ again:
return -ENOMEM;
src_pte = pte_offset_map_nested(src_pmd, addr);
+ progress = 0;
spin_lock(&src_mm->page_table_lock);
do {
/*
* We are holding two locks at this point - either of them
* could generate latencies in another task on another CPU.
*/
- if (need_resched() ||
+ if (progress >= 32 && (need_resched() ||
need_lockbreak(&src_mm->page_table_lock) ||
- need_lockbreak(&dst_mm->page_table_lock))
+ need_lockbreak(&dst_mm->page_table_lock)))
break;
- if (pte_none(*src_pte))
+ if (pte_none(*src_pte)) {
+ progress++;
continue;
+ }
copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
spin_unlock(&src_mm->page_table_lock);
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 11/15] ptwalk: copy_pte_range hang
2005-03-09 22:13 ` [PATCH 11/15] ptwalk: copy_pte_range hang Hugh Dickins
@ 2005-03-09 23:25 ` Nick Piggin
0 siblings, 0 replies; 20+ messages in thread
From: Nick Piggin @ 2005-03-09 23:25 UTC (permalink / raw)
To: Hugh Dickins; +Cc: Andrew Morton, linux-kernel
Hugh Dickins wrote:
> This patch is the odd-one-out of the sequence. The one before adjusted
> copy_pte_range from a for loop to a do while loop, and it was therefore
> simplest to check for lockbreak before copying pte: possibility that it
> might keep getting preempted without making progress under some loads.
>
> Some loads such as startup: 2*HT*P4 with preemption cannot even reach
> multiuser login. Suspect needs_lockbreak is broken, can get in a state
> when it remains forever true. Investigate that later: for now, and for
> all time, it makes sense to aim for a little progress before breaking
> out; and we can manage more pte_nones than copies.
>
(Just to reiterate a private mail sent to Hugh earlier)
Yeah I think lockbreak is broken. Because the inner spinlock never
has a cond_resched_lock performed on it, so its break_lock is
never set to 0, but need_lockbreak still always returns 1 for it.
IMO, spin_lock should set break_lock to 0, then cond_resched_lock
need not bother with it.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 12/15] ptwalk: clear_page_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (10 preceding siblings ...)
2005-03-09 22:13 ` [PATCH 11/15] ptwalk: copy_pte_range hang Hugh Dickins
@ 2005-03-09 22:14 ` Hugh Dickins
2005-03-09 22:14 ` [PATCH 13/15] ptwalk: move p?d_none_or_clear_bad Hugh Dickins
` (3 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:14 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Convert clear_page_range pagetable walkers to loops using p?d_addr_end.
These are exceptional in that some out-of-tree memory layouts might pass
end 0, so the macros need to handle that (though previous code did not).
The naming here was out of step: now we usually pass pmd_t *pmd down to
action_on_pte_range, not action_on_pmd_range, etc: made like the others.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 98 ++++++++++++++++++++++++++++--------------------------------
1 files changed, 46 insertions(+), 52 deletions(-)
--- ptwalk11/mm/memory.c 2005-03-09 01:38:54.000000000 +0000
+++ ptwalk12/mm/memory.c 2005-03-09 01:39:06.000000000 +0000
@@ -110,15 +110,14 @@ void pmd_clear_bad(pmd_t *pmd)
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
+static inline void clear_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long addr, unsigned long end)
{
- struct page *page;
-
if (pmd_none_or_clear_bad(pmd))
return;
- if (!((start | end) & ~PMD_MASK)) {
- /* Only clear full, aligned ranges */
- page = pmd_page(*pmd);
+ if (!((addr | end) & ~PMD_MASK)) {
+ /* Only free fully aligned ranges */
+ struct page *page = pmd_page(*pmd);
pmd_clear(pmd);
dec_page_state(nr_page_table_pages);
tlb->mm->nr_ptes--;
@@ -126,77 +125,72 @@ static inline void clear_pmd_range(struc
}
}
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end)
+static inline void clear_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long addr, unsigned long end)
{
- unsigned long addr = start, next;
- pmd_t *pmd, *__pmd;
+ pmd_t *pmd;
+ unsigned long next;
+ pmd_t *empty_pmd = NULL;
if (pud_none_or_clear_bad(pud))
return;
- pmd = __pmd = pmd_offset(pud, start);
+ pmd = pmd_offset(pud, addr);
+
+ /* Only free fully aligned ranges */
+ if (!((addr | end) & ~PUD_MASK))
+ empty_pmd = pmd;
do {
- next = (addr + PMD_SIZE) & PMD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pmd_range(tlb, pmd, addr, next);
- pmd++;
- addr = next;
- } while (addr && (addr < end));
+ next = pmd_addr_end(addr, end);
+ clear_pte_range(tlb, pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
- if (!((start | end) & ~PUD_MASK)) {
- /* Only clear full, aligned ranges */
+ if (empty_pmd) {
pud_clear(pud);
- pmd_free_tlb(tlb, __pmd);
+ pmd_free_tlb(tlb, empty_pmd);
}
}
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
+static inline void clear_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
{
- unsigned long addr = start, next;
- pud_t *pud, *__pud;
+ pud_t *pud;
+ unsigned long next;
+ pud_t *empty_pud = NULL;
if (pgd_none_or_clear_bad(pgd))
return;
- pud = __pud = pud_offset(pgd, start);
+ pud = pud_offset(pgd, addr);
+
+ /* Only free fully aligned ranges */
+ if (!((addr | end) & ~PGDIR_MASK))
+ empty_pud = pud;
do {
- next = (addr + PUD_SIZE) & PUD_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pud_range(tlb, pud, addr, next);
- pud++;
- addr = next;
- } while (addr && (addr < end));
+ next = pud_addr_end(addr, end);
+ clear_pmd_range(tlb, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
- if (!((start | end) & ~PGDIR_MASK)) {
- /* Only clear full, aligned ranges */
+ if (empty_pud) {
pgd_clear(pgd);
- pud_free_tlb(tlb, __pud);
+ pud_free_tlb(tlb, empty_pud);
}
}
/*
* This function clears user-level page tables of a process.
- *
+ * Unlike other pagetable walks, some memory layouts might give end 0.
* Must be called with pagetable lock held.
*/
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void clear_page_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end)
{
- unsigned long addr = start, next;
- pgd_t * pgd = pgd_offset(tlb->mm, start);
- unsigned long i;
-
- for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
- next = (addr + PGDIR_SIZE) & PGDIR_MASK;
- if (next > end || next <= addr)
- next = end;
-
- clear_pgd_range(tlb, pgd, addr, next);
- pgd++;
- addr = next;
- }
+ pgd_t *pgd;
+ unsigned long next;
+
+ pgd = pgd_offset(tlb->mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ clear_pud_range(tlb, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
}
pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 13/15] ptwalk: move p?d_none_or_clear_bad
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (11 preceding siblings ...)
2005-03-09 22:14 ` [PATCH 12/15] ptwalk: clear_page_range Hugh Dickins
@ 2005-03-09 22:14 ` Hugh Dickins
2005-03-09 22:15 ` [PATCH 14/15] ptwalk: inline pmd_range and pud_range Hugh Dickins
` (2 subsequent siblings)
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:14 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
To handle large sparse areas a little more efficiently, follow Nick and
move the p?d_none_or_clear_bad tests up from the start of each function
to its callsite.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 24 ++++++++++++------------
mm/mprotect.c | 12 ++++++------
mm/msync.c | 12 ++++++------
mm/swapfile.c | 12 ++++++------
mm/vmalloc.c | 15 ++++++---------
5 files changed, 36 insertions(+), 39 deletions(-)
--- ptwalk12/mm/memory.c 2005-03-09 01:39:06.000000000 +0000
+++ ptwalk13/mm/memory.c 2005-03-09 01:39:18.000000000 +0000
@@ -113,8 +113,6 @@ void pmd_clear_bad(pmd_t *pmd)
static inline void clear_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr, unsigned long end)
{
- if (pmd_none_or_clear_bad(pmd))
- return;
if (!((addr | end) & ~PMD_MASK)) {
/* Only free fully aligned ranges */
struct page *page = pmd_page(*pmd);
@@ -132,8 +130,6 @@ static inline void clear_pmd_range(struc
unsigned long next;
pmd_t *empty_pmd = NULL;
- if (pud_none_or_clear_bad(pud))
- return;
pmd = pmd_offset(pud, addr);
/* Only free fully aligned ranges */
@@ -141,6 +137,8 @@ static inline void clear_pmd_range(struc
empty_pmd = pmd;
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
clear_pte_range(tlb, pmd, addr, next);
} while (pmd++, addr = next, addr != end);
@@ -157,8 +155,6 @@ static inline void clear_pud_range(struc
unsigned long next;
pud_t *empty_pud = NULL;
- if (pgd_none_or_clear_bad(pgd))
- return;
pud = pud_offset(pgd, addr);
/* Only free fully aligned ranges */
@@ -166,6 +162,8 @@ static inline void clear_pud_range(struc
empty_pud = pud;
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
clear_pmd_range(tlb, pud, addr, next);
} while (pud++, addr = next, addr != end);
@@ -189,6 +187,8 @@ void clear_page_range(struct mmu_gather
pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
clear_pud_range(tlb, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -432,8 +432,6 @@ static void zap_pte_range(struct mmu_gat
{
pte_t *pte;
- if (pmd_none_or_clear_bad(pmd))
- return;
pte = pte_offset_map(pmd, addr);
do {
pte_t ptent = *pte;
@@ -505,11 +503,11 @@ static void zap_pmd_range(struct mmu_gat
pmd_t *pmd;
unsigned long next;
- if (pud_none_or_clear_bad(pud))
- return;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
zap_pte_range(tlb, pmd, addr, next, details);
} while (pmd++, addr = next, addr != end);
}
@@ -521,11 +519,11 @@ static void zap_pud_range(struct mmu_gat
pud_t *pud;
unsigned long next;
- if (pgd_none_or_clear_bad(pgd))
- return;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
zap_pmd_range(tlb, pud, addr, next, details);
} while (pud++, addr = next, addr != end);
}
@@ -545,6 +543,8 @@ static void unmap_page_range(struct mmu_
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
zap_pud_range(tlb, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
--- ptwalk12/mm/mprotect.c 2005-03-09 01:36:01.000000000 +0000
+++ ptwalk13/mm/mprotect.c 2005-03-09 01:39:18.000000000 +0000
@@ -30,8 +30,6 @@ static inline void change_pte_range(stru
{
pte_t *pte;
- if (pmd_none_or_clear_bad(pmd))
- return;
pte = pte_offset_map(pmd, addr);
do {
if (pte_present(*pte)) {
@@ -54,11 +52,11 @@ static inline void change_pmd_range(stru
pmd_t *pmd;
unsigned long next;
- if (pud_none_or_clear_bad(pud))
- return;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
change_pte_range(mm, pmd, addr, next, newprot);
} while (pmd++, addr = next, addr != end);
}
@@ -69,11 +67,11 @@ static inline void change_pud_range(stru
pud_t *pud;
unsigned long next;
- if (pgd_none_or_clear_bad(pgd))
- return;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
change_pmd_range(mm, pud, addr, next, newprot);
} while (pud++, addr = next, addr != end);
}
@@ -92,6 +90,8 @@ static void change_protection(struct vm_
spin_lock(&mm->page_table_lock);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
change_pud_range(mm, pgd, addr, next, newprot);
} while (pgd++, addr = next, addr != end);
flush_tlb_range(vma, start, end);
--- ptwalk12/mm/msync.c 2005-03-09 01:36:14.000000000 +0000
+++ ptwalk13/mm/msync.c 2005-03-09 01:39:18.000000000 +0000
@@ -27,8 +27,6 @@ static void sync_pte_range(struct vm_are
{
pte_t *pte;
- if (pmd_none_or_clear_bad(pmd))
- return;
pte = pte_offset_map(pmd, addr);
do {
unsigned long pfn;
@@ -56,11 +54,11 @@ static inline void sync_pmd_range(struct
pmd_t *pmd;
unsigned long next;
- if (pud_none_or_clear_bad(pud))
- return;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
sync_pte_range(vma, pmd, addr, next);
} while (pmd++, addr = next, addr != end);
}
@@ -71,11 +69,11 @@ static inline void sync_pud_range(struct
pud_t *pud;
unsigned long next;
- if (pgd_none_or_clear_bad(pgd))
- return;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
sync_pmd_range(vma, pud, addr, next);
} while (pud++, addr = next, addr != end);
}
@@ -99,6 +97,8 @@ static void sync_page_range(struct vm_ar
spin_lock(&mm->page_table_lock);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
sync_pud_range(vma, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
spin_unlock(&mm->page_table_lock);
--- ptwalk12/mm/swapfile.c 2005-03-09 01:36:25.000000000 +0000
+++ ptwalk13/mm/swapfile.c 2005-03-09 01:39:18.000000000 +0000
@@ -442,8 +442,6 @@ static int unuse_pte_range(struct vm_are
pte_t *pte;
pte_t swp_pte = swp_entry_to_pte(entry);
- if (pmd_none_or_clear_bad(pmd))
- return 0;
pte = pte_offset_map(pmd, addr);
do {
/*
@@ -467,11 +465,11 @@ static int unuse_pmd_range(struct vm_are
pmd_t *pmd;
unsigned long next;
- if (pud_none_or_clear_bad(pud))
- return 0;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
if (unuse_pte_range(vma, pmd, addr, next, entry, page))
return 1;
} while (pmd++, addr = next, addr != end);
@@ -485,11 +483,11 @@ static int unuse_pud_range(struct vm_are
pud_t *pud;
unsigned long next;
- if (pgd_none_or_clear_bad(pgd))
- return 0;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
if (unuse_pmd_range(vma, pud, addr, next, entry, page))
return 1;
} while (pud++, addr = next, addr != end);
@@ -516,6 +514,8 @@ static int unuse_vma(struct vm_area_stru
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
if (unuse_pud_range(vma, pgd, addr, next, entry, page))
return 1;
} while (pgd++, addr = next, addr != end);
--- ptwalk12/mm/vmalloc.c 2005-03-09 01:36:38.000000000 +0000
+++ ptwalk13/mm/vmalloc.c 2005-03-09 01:39:18.000000000 +0000
@@ -27,9 +27,6 @@ static void vunmap_pte_range(pmd_t *pmd,
{
pte_t *pte;
- if (pmd_none_or_clear_bad(pmd))
- return;
-
pte = pte_offset_kernel(pmd, addr);
do {
pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
@@ -42,12 +39,11 @@ static void vunmap_pmd_range(pud_t *pud,
pmd_t *pmd;
unsigned long next;
- if (pud_none_or_clear_bad(pud))
- return;
-
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
vunmap_pte_range(pmd, addr, next);
} while (pmd++, addr = next, addr != end);
}
@@ -57,12 +53,11 @@ static void vunmap_pud_range(pgd_t *pgd,
pud_t *pud;
unsigned long next;
- if (pgd_none_or_clear_bad(pgd))
- return;
-
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
vunmap_pmd_range(pud, addr, next);
} while (pud++, addr = next, addr != end);
}
@@ -79,6 +74,8 @@ void unmap_vm_area(struct vm_struct *are
flush_cache_vunmap(addr, end);
do {
next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
vunmap_pud_range(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
flush_tlb_kernel_range((unsigned long) area->addr, end);
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 14/15] ptwalk: inline pmd_range and pud_range
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (12 preceding siblings ...)
2005-03-09 22:14 ` [PATCH 13/15] ptwalk: move p?d_none_or_clear_bad Hugh Dickins
@ 2005-03-09 22:15 ` Hugh Dickins
2005-03-09 22:16 ` [PATCH 15/15] ptwalk: pud and pmd folded Hugh Dickins
2005-03-10 0:39 ` [PATCH 0/15] ptwalk: pagetable walker cleanup Benjamin Herrenschmidt
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
As a general rule, ask the compiler to inline action_on_pmd_range and
action_on_pud_range: they're none very interesting, and it has a better
chance of eliding them that way. But conversely, it helps debug traces
if action_on_pte_range and top action_on_page_range remain uninlined.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
mm/memory.c | 10 +++++-----
mm/mprotect.c | 2 +-
mm/msync.c | 4 ++--
mm/swapfile.c | 4 ++--
mm/vmalloc.c | 18 ++++++++++--------
5 files changed, 20 insertions(+), 18 deletions(-)
--- ptwalk13/mm/memory.c 2005-03-09 01:39:18.000000000 +0000
+++ ptwalk14/mm/memory.c 2005-03-09 01:39:31.000000000 +0000
@@ -358,7 +358,7 @@ again:
return 0;
}
-static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -380,7 +380,7 @@ static int copy_pmd_range(struct mm_stru
return 0;
}
-static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -496,7 +496,7 @@ static void zap_pte_range(struct mmu_gat
pte_unmap(pte - 1);
}
-static void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
@@ -512,7 +512,7 @@ static void zap_pmd_range(struct mmu_gat
} while (pmd++, addr = next, addr != end);
}
-static void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long addr, unsigned long end,
struct zap_details *details)
{
@@ -1013,7 +1013,7 @@ int zeromap_page_range(struct vm_area_st
* mappings are removed. any references to nonexistent pages results
* in null mappings (currently treated as "copy-on-access")
*/
-static inline int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
--- ptwalk13/mm/mprotect.c 2005-03-09 01:39:18.000000000 +0000
+++ ptwalk14/mm/mprotect.c 2005-03-09 01:39:31.000000000 +0000
@@ -25,7 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-static inline void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot)
{
pte_t *pte;
--- ptwalk13/mm/msync.c 2005-03-09 01:39:18.000000000 +0000
+++ ptwalk14/mm/msync.c 2005-03-09 01:39:31.000000000 +0000
@@ -105,7 +105,7 @@ static void sync_page_range(struct vm_ar
}
#ifdef CONFIG_PREEMPT
-static void filemap_sync(struct vm_area_struct *vma,
+static inline void filemap_sync(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
const size_t chunk = 64 * 1024; /* bytes */
@@ -120,7 +120,7 @@ static void filemap_sync(struct vm_area_
} while (addr = next, addr != end);
}
#else
-static void filemap_sync(struct vm_area_struct *vma,
+static inline void filemap_sync(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
sync_page_range(vma, addr, end);
--- ptwalk13/mm/swapfile.c 2005-03-09 01:39:18.000000000 +0000
+++ ptwalk14/mm/swapfile.c 2005-03-09 01:39:31.000000000 +0000
@@ -458,7 +458,7 @@ static int unuse_pte_range(struct vm_are
return 0;
}
-static int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
swp_entry_t entry, struct page *page)
{
@@ -476,7 +476,7 @@ static int unuse_pmd_range(struct vm_are
return 0;
}
-static int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
swp_entry_t entry, struct page *page)
{
--- ptwalk13/mm/vmalloc.c 2005-03-09 01:39:18.000000000 +0000
+++ ptwalk14/mm/vmalloc.c 2005-03-09 01:39:31.000000000 +0000
@@ -34,7 +34,8 @@ static void vunmap_pte_range(pmd_t *pmd,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
+static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end)
{
pmd_t *pmd;
unsigned long next;
@@ -48,7 +49,8 @@ static void vunmap_pmd_range(pud_t *pud,
} while (pmd++, addr = next, addr != end);
}
-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end)
{
pud_t *pud;
unsigned long next;
@@ -81,8 +83,8 @@ void unmap_vm_area(struct vm_struct *are
flush_tlb_kernel_range((unsigned long) area->addr, end);
}
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- pgprot_t prot, struct page ***pages)
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
{
pte_t *pte;
@@ -100,8 +102,8 @@ static int vmap_pte_range(pmd_t *pmd, un
return 0;
}
-static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
- pgprot_t prot, struct page ***pages)
+static inline int vmap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
{
pmd_t *pmd;
unsigned long next;
@@ -117,8 +119,8 @@ static int vmap_pmd_range(pud_t *pud, un
return 0;
}
-static int vmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
- pgprot_t prot, struct page ***pages)
+static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, pgprot_t prot, struct page ***pages)
{
pud_t *pud;
unsigned long next;
^ permalink raw reply [flat|nested] 20+ messages in thread* [PATCH 15/15] ptwalk: pud and pmd folded
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (13 preceding siblings ...)
2005-03-09 22:15 ` [PATCH 14/15] ptwalk: inline pmd_range and pud_range Hugh Dickins
@ 2005-03-09 22:16 ` Hugh Dickins
2005-03-10 0:39 ` [PATCH 0/15] ptwalk: pagetable walker cleanup Benjamin Herrenschmidt
15 siblings, 0 replies; 20+ messages in thread
From: Hugh Dickins @ 2005-03-09 22:16 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
[PATCH 15/15] ptwalk: pud and pmd folded
Nick Piggin's patch to fold away most of the pud and pmd levels when not
required. Adjusted to define minimal pud_addr_end (in the 4LEVEL_HACK
case too) and pmd_addr_end. Responsible for half of the savings.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
include/asm-generic/4level-fixup.h | 4 ++++
include/asm-generic/pgtable-nopmd.h | 5 +++++
include/asm-generic/pgtable-nopud.h | 5 +++++
include/asm-generic/pgtable.h | 4 ++++
mm/memory.c | 34 ++++++++--------------------------
5 files changed, 26 insertions(+), 26 deletions(-)
--- ptwalk14/include/asm-generic/4level-fixup.h 2005-03-02 07:39:19.000000000 +0000
+++ ptwalk15/include/asm-generic/4level-fixup.h 2005-03-09 01:39:43.000000000 +0000
@@ -2,6 +2,7 @@
#define _4LEVEL_FIXUP_H
#define __ARCH_HAS_4LEVEL_HACK
+#define __PAGETABLE_PUD_FOLDED
#define PUD_SIZE PGDIR_SIZE
#define PUD_MASK PGDIR_MASK
@@ -31,4 +32,7 @@
#define pud_free(x) do { } while (0)
#define __pud_free_tlb(tlb, x) do { } while (0)
+#undef pud_addr_end
+#define pud_addr_end(addr, end) (end)
+
#endif
--- ptwalk14/include/asm-generic/pgtable-nopmd.h 2005-03-02 07:39:19.000000000 +0000
+++ ptwalk15/include/asm-generic/pgtable-nopmd.h 2005-03-09 01:39:43.000000000 +0000
@@ -5,6 +5,8 @@
#include <asm-generic/pgtable-nopud.h>
+#define __PAGETABLE_PMD_FOLDED
+
/*
* Having the pmd type consist of a pud gets the size right, and allows
* us to conceptually access the pud entry that this pmd is folded into
@@ -55,6 +57,9 @@ static inline pmd_t * pmd_offset(pud_t *
#define pmd_free(x) do { } while (0)
#define __pmd_free_tlb(tlb, x) do { } while (0)
+#undef pmd_addr_end
+#define pmd_addr_end(addr, end) (end)
+
#endif /* __ASSEMBLY__ */
#endif /* _PGTABLE_NOPMD_H */
--- ptwalk14/include/asm-generic/pgtable-nopud.h 2005-03-02 07:39:27.000000000 +0000
+++ ptwalk15/include/asm-generic/pgtable-nopud.h 2005-03-09 01:39:43.000000000 +0000
@@ -3,6 +3,8 @@
#ifndef __ASSEMBLY__
+#define __PAGETABLE_PUD_FOLDED
+
/*
* Having the pud type consist of a pgd gets the size right, and allows
* us to conceptually access the pgd entry that this pud is folded into
@@ -52,5 +54,8 @@ static inline pud_t * pud_offset(pgd_t *
#define pud_free(x) do { } while (0)
#define __pud_free_tlb(tlb, x) do { } while (0)
+#undef pud_addr_end
+#define pud_addr_end(addr, end) (end)
+
#endif /* __ASSEMBLY__ */
#endif /* _PGTABLE_NOPUD_H */
--- ptwalk14/include/asm-generic/pgtable.h 2005-03-09 01:36:01.000000000 +0000
+++ ptwalk15/include/asm-generic/pgtable.h 2005-03-09 01:39:43.000000000 +0000
@@ -146,15 +146,19 @@ static inline void ptep_set_wrprotect(st
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#ifndef pud_addr_end
#define pud_addr_end(addr, end) \
({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#endif
+#ifndef pmd_addr_end
#define pmd_addr_end(addr, end) \
({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#endif
#ifndef __ASSEMBLY__
/*
--- ptwalk14/mm/memory.c 2005-03-09 01:39:31.000000000 +0000
+++ ptwalk15/mm/memory.c 2005-03-09 01:39:43.000000000 +0000
@@ -1973,15 +1973,12 @@ int handle_mm_fault(struct mm_struct *mm
return VM_FAULT_OOM;
}
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
*/
pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
@@ -2005,15 +2002,14 @@ pud_t fastcall *__pud_alloc(struct mm_st
out:
return pud_offset(pgd, address);
}
+#endif /* __PAGETABLE_PUD_FOLDED */
+#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
*
* We've already handled the fast-path in-line, and we own the
* page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
*/
pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
@@ -2029,38 +2025,24 @@ pmd_t fastcall *__pmd_alloc(struct mm_st
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
+#ifndef __ARCH_HAS_4LEVEL_HACK
if (pud_present(*pud)) {
pmd_free(new);
goto out;
}
pud_populate(mm, pud, new);
- out:
- return pmd_offset(pud, address);
-}
#else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
- pmd_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pmd_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
if (pgd_present(*pud)) {
pmd_free(new);
goto out;
}
pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
return pmd_offset(pud, address);
}
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
int make_pages_present(unsigned long addr, unsigned long end)
{
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 0/15] ptwalk: pagetable walker cleanup
2005-03-09 22:05 [PATCH 0/15] ptwalk: pagetable walker cleanup Hugh Dickins
` (14 preceding siblings ...)
2005-03-09 22:16 ` [PATCH 15/15] ptwalk: pud and pmd folded Hugh Dickins
@ 2005-03-10 0:39 ` Benjamin Herrenschmidt
2005-03-10 1:02 ` David S. Miller
15 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-03-10 0:39 UTC (permalink / raw)
To: Hugh Dickins; +Cc: Andrew Morton, Linux Kernel list, David S. Miller
On Wed, 2005-03-09 at 22:05 +0000, Hugh Dickins wrote:
> Here's a cleanup of the pagetable walkers, in common and i386 code,
> based on 2.6.11-bk5. Mainly to make them all go the same simpler way,
> so they're easier to follow with less room for error; but also to reduce
> the code size and speed it up a little. These are janitorial changes,
> other arches may follow whenever it suits them.
>
> .../...
Do you have them on HTTP somewhere ? Apparently, a few of the 15 patches
didn't make it to me.
There are some other bugs introduced by set_pte_at() caused by latent
bugs in the PTE walkers that 'drop' part of the address along the way,
notably the vmalloc.c ones are bogus, thus breaking ppc/ppc64 in subtle
ways. Before I send patches, I'd rather check if it's not all fixed by
your patches first :)
Ben.
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 0/15] ptwalk: pagetable walker cleanup
2005-03-10 0:39 ` [PATCH 0/15] ptwalk: pagetable walker cleanup Benjamin Herrenschmidt
@ 2005-03-10 1:02 ` David S. Miller
2005-03-10 1:08 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 20+ messages in thread
From: David S. Miller @ 2005-03-10 1:02 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: hugh, akpm, linux-kernel, davem
On Thu, 10 Mar 2005 11:39:44 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> There are some other bugs introduced by set_pte_at() caused by latent
> bugs in the PTE walkers that 'drop' part of the address along the way,
> notably the vmalloc.c ones are bogus, thus breaking ppc/ppc64 in subtle
> ways. Before I send patches, I'd rather check if it's not all fixed by
> your patches first :)
Ben, I fixed vmalloc and the other cases when I pushed the set_pte_at()
changes to Linus. Here is the changeset that fixes them, and it's certainly
in Linus's tree:
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/26 20:51:23-08:00 davem@nuts.davemloft.net
# [MM]: Pass correct address down to bottom of page table iterators.
#
# Some routines, namely zeromap_pte_range, remap_pte_range,
# change_pte_range, unmap_area_pte, and map_area_pte, were
# using a chopped off address. This causes bogus addresses
# to be passed into set_pte_at() and friends, resulting
# in missed TLB flushes and other nasties.
#
# Signed-off-by: David S. Miller <davem@davemloft.net>
#
# mm/vmalloc.c
# 2005/02/26 20:50:16-08:00 davem@nuts.davemloft.net +13 -9
# [MM]: Pass correct address down to bottom of page table iterators.
#
# mm/mprotect.c
# 2005/02/26 20:50:16-08:00 davem@nuts.davemloft.net +10 -7
# [MM]: Pass correct address down to bottom of page table iterators.
#
# mm/memory.c
# 2005/02/26 20:50:16-08:00 davem@nuts.davemloft.net +7 -5
# [MM]: Pass correct address down to bottom of page table iterators.
#
diff -Nru a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c 2005-03-09 17:09:47 -08:00
+++ b/mm/memory.c 2005-03-09 17:09:47 -08:00
@@ -992,16 +992,17 @@
unsigned long address,
unsigned long size, pgprot_t prot)
{
- unsigned long end;
+ unsigned long base, end;
+ base = address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
+ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(base+address), prot));
BUG_ON(!pte_none(*pte));
- set_pte_at(mm, address, pte, zero_pte);
+ set_pte_at(mm, base+address, pte, zero_pte);
address += PAGE_SIZE;
pte++;
} while (address && (address < end));
@@ -1106,8 +1107,9 @@
unsigned long address, unsigned long size,
unsigned long pfn, pgprot_t prot)
{
- unsigned long end;
+ unsigned long base, end;
+ base = address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
@@ -1115,7 +1117,7 @@
do {
BUG_ON(!pte_none(*pte));
if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte_at(mm, address, pte, pfn_pte(pfn, prot));
+ set_pte_at(mm, base+address, pte, pfn_pte(pfn, prot));
address += PAGE_SIZE;
pfn++;
pte++;
diff -Nru a/mm/mprotect.c b/mm/mprotect.c
--- a/mm/mprotect.c 2005-03-09 17:09:47 -08:00
+++ b/mm/mprotect.c 2005-03-09 17:09:47 -08:00
@@ -30,7 +30,7 @@
unsigned long size, pgprot_t newprot)
{
pte_t * pte;
- unsigned long end;
+ unsigned long base, end;
if (pmd_none(*pmd))
return;
@@ -40,6 +40,7 @@
return;
}
pte = pte_offset_map(pmd, address);
+ base = address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
@@ -52,8 +53,8 @@
* bits by wiping the pte and then setting the new pte
* into place.
*/
- entry = ptep_get_and_clear(mm, address, pte);
- set_pte_at(mm, address, pte, pte_modify(entry, newprot));
+ entry = ptep_get_and_clear(mm, base + address, pte);
+ set_pte_at(mm, base + address, pte, pte_modify(entry, newprot));
}
address += PAGE_SIZE;
pte++;
@@ -66,7 +67,7 @@
unsigned long size, pgprot_t newprot)
{
pmd_t * pmd;
- unsigned long end;
+ unsigned long base, end;
if (pud_none(*pud))
return;
@@ -76,12 +77,13 @@
return;
}
pmd = pmd_offset(pud, address);
+ base = address & PUD_MASK;
address &= ~PUD_MASK;
end = address + size;
if (end > PUD_SIZE)
end = PUD_SIZE;
do {
- change_pte_range(mm, pmd, address, end - address, newprot);
+ change_pte_range(mm, pmd, base + address, end - address, newprot);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
@@ -92,7 +94,7 @@
unsigned long size, pgprot_t newprot)
{
pud_t * pud;
- unsigned long end;
+ unsigned long base, end;
if (pgd_none(*pgd))
return;
@@ -102,12 +104,13 @@
return;
}
pud = pud_offset(pgd, address);
+ base = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- change_pmd_range(mm, pud, address, end - address, newprot);
+ change_pmd_range(mm, pud, base + address, end - address, newprot);
address = (address + PUD_SIZE) & PUD_MASK;
pud++;
} while (address && (address < end));
diff -Nru a/mm/vmalloc.c b/mm/vmalloc.c
--- a/mm/vmalloc.c 2005-03-09 17:09:47 -08:00
+++ b/mm/vmalloc.c 2005-03-09 17:09:47 -08:00
@@ -26,7 +26,7 @@
static void unmap_area_pte(pmd_t *pmd, unsigned long address,
unsigned long size)
{
- unsigned long end;
+ unsigned long base, end;
pte_t *pte;
if (pmd_none(*pmd))
@@ -38,6 +38,7 @@
}
pte = pte_offset_kernel(pmd, address);
+ base = address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
@@ -45,7 +46,7 @@
do {
pte_t page;
- page = ptep_get_and_clear(&init_mm, address, pte);
+ page = ptep_get_and_clear(&init_mm, base + address, pte);
address += PAGE_SIZE;
pte++;
if (pte_none(page))
@@ -59,7 +60,7 @@
static void unmap_area_pmd(pud_t *pud, unsigned long address,
unsigned long size)
{
- unsigned long end;
+ unsigned long base, end;
pmd_t *pmd;
if (pud_none(*pud))
@@ -71,13 +72,14 @@
}
pmd = pmd_offset(pud, address);
+ base = address & PUD_MASK;
address &= ~PUD_MASK;
end = address + size;
if (end > PUD_SIZE)
end = PUD_SIZE;
do {
- unmap_area_pte(pmd, address, end - address);
+ unmap_area_pte(pmd, base + address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address < end);
@@ -87,7 +89,7 @@
unsigned long size)
{
pud_t *pud;
- unsigned long end;
+ unsigned long base, end;
if (pgd_none(*pgd))
return;
@@ -98,13 +100,14 @@
}
pud = pud_offset(pgd, address);
+ base = address & PGDIR_MASK;
address &= ~PGDIR_MASK;
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
do {
- unmap_area_pmd(pud, address, end - address);
+ unmap_area_pmd(pud, base + address, end - address);
address = (address + PUD_SIZE) & PUD_MASK;
pud++;
} while (address && (address < end));
@@ -114,8 +117,9 @@
unsigned long size, pgprot_t prot,
struct page ***pages)
{
- unsigned long end;
+ unsigned long base, end;
+ base = address & PMD_MASK;
address &= ~PMD_MASK;
end = address + size;
if (end > PMD_SIZE)
@@ -127,7 +131,7 @@
if (!page)
return -ENOMEM;
- set_pte_at(&init_mm, address, pte, mk_pte(page, prot));
+ set_pte_at(&init_mm, base + address, pte, mk_pte(page, prot));
address += PAGE_SIZE;
pte++;
(*pages)++;
@@ -151,7 +155,7 @@
pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
if (!pte)
return -ENOMEM;
- if (map_area_pte(pte, address, end - address, prot, pages))
+ if (map_area_pte(pte, base + address, end - address, prot, pages))
return -ENOMEM;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 0/15] ptwalk: pagetable walker cleanup
2005-03-10 1:02 ` David S. Miller
@ 2005-03-10 1:08 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-03-10 1:08 UTC (permalink / raw)
To: David S. Miller; +Cc: hugh, Andrew Morton, Linux Kernel list, David S. Miller
On Wed, 2005-03-09 at 17:02 -0800, David S. Miller wrote:
> On Thu, 10 Mar 2005 11:39:44 +1100
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
>
> > There are some other bugs introduced by set_pte_at() caused by latent
> > bugs in the PTE walkers that 'drop' part of the address along the way,
> > notably the vmalloc.c ones are bogus, thus breaking ppc/ppc64 in subtle
> > ways. Before I send patches, I'd rather check if it's not all fixed by
> > your patches first :)
>
> Ben, I fixed vmalloc and the other cases when I pushed the set_pte_at()
> changes to Linus. Here is the changeset that fixes them, and it's certainly
> in Linus's tree:
Yah, but look at the cruft in arch/ppc64/mm/init.c, specifically,
unmap_im_area_{pte,pmd,pud,..} ...
I'll fix it.
Ben.
^ permalink raw reply [flat|nested] 20+ messages in thread