linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <dave@linux.vnet.ibm.com>
To: Hans Rosenfeld <hans.rosenfeld@amd.com>
Cc: Hugh Dickins <hugh@veritas.com>,
	Nishanth Aravamudan <nacc@us.ibm.com>,
	Ingo Molnar <mingo@elte.hu>,
	Jeff Chua <jeff.chua.linux@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Gabriel C <nix.or.die@googlemail.com>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Matt Mackall <mpm@selenic.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH] x86: fix PAE pmd_bad bootup warning
Date: Thu, 08 May 2008 16:15:32 -0700	[thread overview]
Message-ID: <1210288532.7905.89.camel@nimitz.home.sr71.net> (raw)
In-Reply-To: <20080508200239.GJ12654@escobedo.amd.com>

Here's one quick stab at a solution.  I figured that we already pass
that 'private' variable around.  This patch just sticks that variable
*in* the mm_walk and also makes the caller fill in an 'mm' as well.
Then, we just pass the actual mm_walk around.

Maybe we should just stick the VMA in the mm_walk as well, and have the
common code keep it up to date with the addresses currently being
walked.

Sadly, I didn't quite get enough time to flesh this idea out very far
today, and I'll be offline for a couple of days now.  But, if someone
wants to go this route, I thought this might be useful.  

---

 linux-2.6.git-dave/fs/proc/task_mmu.c |   45 +++++++++++++++++++---------------
 linux-2.6.git-dave/include/linux/mm.h |   17 ++++++------
 linux-2.6.git-dave/mm/pagewalk.c      |   41 +++++++++++++++---------------
 3 files changed, 56 insertions(+), 47 deletions(-)

diff -puN mm/pagewalk.c~pass-mm-into-pagewalkers mm/pagewalk.c
--- linux-2.6.git/mm/pagewalk.c~pass-mm-into-pagewalkers	2008-05-08 15:49:47.000000000 -0700
+++ linux-2.6.git-dave/mm/pagewalk.c	2008-05-08 15:49:54.000000000 -0700
@@ -3,14 +3,14 @@
 #include <linux/sched.h>
 
 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pte_t *pte;
 	int err = 0;
 
 	pte = pte_offset_map(pmd, addr);
 	for (;;) {
-		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private);
+		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
 		if (err)
 		       break;
 		addr += PAGE_SIZE;
@@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, un
 }
 
 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, un
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pmd_entry)
-			err = walk->pmd_entry(pmd, addr, next, private);
+			err = walk->pmd_entry(pmd, addr, next, walk);
 		if (!err && walk->pte_entry)
-			err = walk_pte_range(pmd, addr, next, walk, private);
+			err = walk_pte_range(pmd, addr, next, walk);
 		if (err)
 			break;
 	} while (pmd++, addr = next, addr != end);
@@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, un
 }
 
 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
-			  const struct mm_walk *walk, void *private)
+			  struct mm_walk *walk)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, un
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pud_entry)
-			err = walk->pud_entry(pud, addr, next, private);
+			err = walk->pud_entry(pud, addr, next, walk);
 		if (!err && (walk->pmd_entry || walk->pte_entry))
-			err = walk_pmd_range(pud, addr, next, walk, private);
+			err = walk_pmd_range(pud, addr, next, walk);
 		if (err)
 			break;
 	} while (pud++, addr = next, addr != end);
@@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, un
  * @addr: starting address
  * @end: ending address
  * @walk: set of callbacks to invoke for each level of the tree
- * @private: private data passed to the callback function
  *
  * Recursively walk the page table for the memory area in a VMA,
  * calling supplied callbacks. Callbacks are called in-order (first
  * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
  * etc.). If lower-level callbacks are omitted, walking depth is reduced.
  *
- * Each callback receives an entry pointer, the start and end of the
- * associated range, and a caller-supplied private data pointer.
+ * Each callback receives an entry pointer and the start and end of the
+ * associated range, and a copy of the original mm_walk for access to
+ * the ->private or ->mm fields.
  *
  * No locks are taken, but the bottom level iterator will map PTE
  * directories from highmem if necessary.
@@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, un
  * If any callback returns a non-zero value, the walk is aborted and
  * the return value is propagated back to the caller. Otherwise 0 is returned.
  */
-int walk_page_range(const struct mm_struct *mm,
-		    unsigned long addr, unsigned long end,
-		    const struct mm_walk *walk, void *private)
+int walk_page_range(unsigned long addr, unsigned long end,
+		    struct mm_walk *walk)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -112,21 +111,23 @@ int walk_page_range(const struct mm_stru
 	if (addr >= end)
 		return err;
 
-	pgd = pgd_offset(mm, addr);
+	if (!walk->mm)
+		return -EINVAL;
+	pgd = pgd_offset(walk->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
 			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, private);
+				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 		if (walk->pgd_entry)
-			err = walk->pgd_entry(pgd, addr, next, private);
+			err = walk->pgd_entry(pgd, addr, next, walk);
 		if (!err &&
 		    (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
-			err = walk_pud_range(pgd, addr, next, walk, private);
+			err = walk_pud_range(pgd, addr, next, walk);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
diff -puN include/linux/mm.h~pass-mm-into-pagewalkers include/linux/mm.h
--- linux-2.6.git/include/linux/mm.h~pass-mm-into-pagewalkers	2008-05-08 15:49:47.000000000 -0700
+++ linux-2.6.git-dave/include/linux/mm.h	2008-05-08 15:49:54.000000000 -0700
@@ -760,16 +760,17 @@ unsigned long unmap_vmas(struct mmu_gath
  * (see walk_page_range for more details)
  */
 struct mm_walk {
-	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
-	int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
-	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
-	int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
-	int (*pte_hole)(unsigned long, unsigned long, void *);
+	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
+	int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
+	struct mm_struct *mm;
+	void *private;
 };
 
-int walk_page_range(const struct mm_struct *, unsigned long addr,
-		    unsigned long end, const struct mm_walk *walk,
-		    void *private);
+int walk_page_range(unsigned long addr, unsigned long end,
+		struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
diff -puN fs/proc/task_mmu.c~pass-mm-into-pagewalkers fs/proc/task_mmu.c
--- linux-2.6.git/fs/proc/task_mmu.c~pass-mm-into-pagewalkers	2008-05-08 15:49:47.000000000 -0700
+++ linux-2.6.git-dave/fs/proc/task_mmu.c	2008-05-08 15:49:59.000000000 -0700
@@ -317,9 +317,9 @@ struct mem_size_stats {
 };
 
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			   void *private)
+			   struct mm_walk *walk)
 {
-	struct mem_size_stats *mss = private;
+	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
@@ -367,19 +367,22 @@ static int smaps_pte_range(pmd_t *pmd, u
 	return 0;
 }
 
-static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
-
 static int show_smap(struct seq_file *m, void *v)
 {
 	struct vm_area_struct *vma = v;
 	struct mem_size_stats mss;
 	int ret;
+	struct mm_walk smaps_walk = {
+		.pmd_entry = smaps_pte_range,
+		.mm = vma->vm_mm,
+		.private = &mss,
+	};
+
 
 	memset(&mss, 0, sizeof mss);
 	mss.vma = vma;
 	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-		walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
-				&smaps_walk, &mss);
+		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
 	ret = show_map(m, v);
 	if (ret)
@@ -428,9 +431,9 @@ const struct file_operations proc_smaps_
 };
 
 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
-				unsigned long end, void *private)
+				unsigned long end, struct mm_walk *walk)
 {
-	struct vm_area_struct *vma = private;
+	struct vm_area_struct *vma = walk->private;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
 	struct page *page;
@@ -454,8 +457,6 @@ static int clear_refs_pte_range(pmd_t *p
 	return 0;
 }
 
-static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
-
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
@@ -478,11 +479,17 @@ static ssize_t clear_refs_write(struct f
 		return -ESRCH;
 	mm = get_task_mm(task);
 	if (mm) {
+		static struct mm_walk clear_refs_walk;
+		memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
+		clear_refs_walk.pmd_entry = clear_refs_pte_range;
+		clear_refs_walk.mm = mm;
 		down_read(&mm->mmap_sem);
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			clear_refs_walk.private = vma;
 			if (!is_vm_hugetlb_page(vma))
-				walk_page_range(mm, vma->vm_start, vma->vm_end,
-						&clear_refs_walk, vma);
+				walk_page_range(vma->vm_start, vma->vm_end,
+						&clear_refs_walk);
+		}
 		flush_tlb_mm(mm);
 		up_read(&mm->mmap_sem);
 		mmput(mm);
@@ -540,9 +547,9 @@ static int add_to_pagemap(unsigned long 
 }
 
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-				void *private)
+				struct mm_walk *walk)
 {
-	struct pagemapread *pm = private;
+	struct pagemapread *pm = walk->private;
 	unsigned long addr;
 	int err = 0;
 	for (addr = start; addr < end; addr += PAGE_SIZE) {
@@ -560,9 +567,9 @@ static u64 swap_pte_to_pagemap_entry(pte
 }
 
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			     void *private)
+			     struct mm_walk *walk)
 {
-	struct pagemapread *pm = private;
+	struct pagemapread *pm = walk->private;
 	pte_t *pte;
 	int err = 0;
 
@@ -687,8 +694,8 @@ static ssize_t pagemap_read(struct file 
 		 * user buffer is tracked in "pm", and the walk
 		 * will stop when we hit the end of the buffer.
 		 */
-		ret = walk_page_range(mm, start_vaddr, end_vaddr,
-					&pagemap_walk, &pm);
+		ret = walk_page_range(start_vaddr, end_vaddr,
+					&pagemap_walk);
 		if (ret == PM_END_OF_BUFFER)
 			ret = 0;
 		/* don't need mmap_sem for these, but this looks cleaner */
_


-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-05-08 23:15 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <b6a2187b0805051806v25fa1272xb08e0b70b9c3408@mail.gmail.com>
     [not found] ` <20080506124946.GA2146@elte.hu>
     [not found]   ` <Pine.LNX.4.64.0805061435510.32567@blonde.site>
     [not found]     ` <alpine.LFD.1.10.0805061138580.32269@woody.linux-foundation.org>
2008-05-06 19:49       ` [PATCH] x86: fix PAE pmd_bad bootup warning Hugh Dickins
2008-05-06 20:06         ` Linus Torvalds
2008-05-06 20:30           ` Hugh Dickins
2008-05-08 16:07             ` Nishanth Aravamudan
2008-05-06 20:22         ` Hans Rosenfeld
2008-05-06 20:36           ` Hugh Dickins
2008-05-07 23:39             ` Nishanth Aravamudan
2008-05-06 20:42           ` Dave Hansen
2008-05-08 14:34             ` Hans Rosenfeld
2008-05-08 14:39               ` Hans Rosenfeld
2008-05-08 14:52               ` Dave Hansen
2008-05-08 15:11                 ` Hans Rosenfeld
2008-05-08 15:51                   ` Dave Hansen
2008-05-08 16:19                     ` Hans Rosenfeld
2008-05-08 16:33                       ` Nishanth Aravamudan
2008-05-08 16:51                         ` Hans Rosenfeld
2008-05-08 17:16                           ` Nishanth Aravamudan
2008-05-08 18:42                             ` Dave Hansen
2008-05-08 18:58                               ` Hugh Dickins
2008-05-08 19:06                                 ` Dave Hansen
2008-05-08 18:48                             ` Hugh Dickins
2008-05-08 19:49                               ` Matt Mackall
2008-05-08 20:08                                 ` Dave Hansen
2008-05-08 20:02                               ` Hans Rosenfeld
2008-05-08 20:16                                 ` Dave Hansen
2008-05-08 23:15                                 ` Dave Hansen [this message]
2008-05-14 19:01                                   ` Matt Mackall
2008-05-09  9:03                                 ` Paul Mundt
2008-05-08 16:42                       ` Dave Hansen
2008-05-08 15:44                 ` Nishanth Aravamudan
2008-05-07  4:40         ` Jeff Chua
2008-05-07  5:30           ` Hugh Dickins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1210288532.7905.89.camel@nimitz.home.sr71.net \
    --to=dave@linux.vnet.ibm.com \
    --cc=arjan@linux.intel.com \
    --cc=hans.rosenfeld@amd.com \
    --cc=hpa@zytor.com \
    --cc=hugh@veritas.com \
    --cc=jeff.chua.linux@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=mpm@selenic.com \
    --cc=nacc@us.ibm.com \
    --cc=nix.or.die@googlemail.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).