public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Matt Mackall <mpm@selenic.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 12/13] maps: Add /proc/pid/pagemap interface
Date: Tue, 03 Apr 2007 21:43:43 -0500	[thread overview]
Message-ID: <13.486631555@selenic.com> (raw)
In-Reply-To: <1.486631555@selenic.com>

Add /proc/pid/pagemap interface

This interface provides a mapping for each page in an address space to
its physical page frame number, allowing precise determination of what
pages are mapped and what pages are shared between processes.

Signed-off-by: Matt Mackall <mpm@selenic.com>

Index: mm/fs/proc/base.c
===================================================================
--- mm.orig/fs/proc/base.c	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/base.c	2007-04-03 14:50:33.000000000 -0500
@@ -664,7 +664,7 @@ out_no_task:
 }
 #endif
 
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file * file, loff_t offset, int orig)
 {
 	switch (orig) {
 	case 0:
@@ -2006,6 +2006,9 @@ static const struct pid_entry tgid_base_
 #ifdef CONFIG_PROC_SMAPS
 	REG("smaps",      S_IRUGO, smaps),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+	REG("pagemap",    S_IRUSR, pagemap),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
@@ -2293,6 +2296,9 @@ static const struct pid_entry tid_base_s
 #ifdef CONFIG_PROC_SMAPS
 	REG("smaps",     S_IRUGO, smaps),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+	REG("pagemap",    S_IRUSR, pagemap),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
Index: mm/fs/proc/internal.h
===================================================================
--- mm.orig/fs/proc/internal.h	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/internal.h	2007-04-03 14:50:33.000000000 -0500
@@ -45,11 +45,13 @@ extern int proc_tid_stat(struct task_str
 extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
+extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
 
 extern const struct file_operations proc_maps_operations;
 extern const struct file_operations proc_numa_maps_operations;
 extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
Index: mm/fs/proc/task_mmu.c
===================================================================
--- mm.orig/fs/proc/task_mmu.c	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/task_mmu.c	2007-04-03 18:02:47.000000000 -0500
@@ -530,3 +530,171 @@ const struct file_operations proc_numa_m
 };
 #endif
 
+#ifdef CONFIG_PROC_PAGEMAP
+struct pagemapread {
+	struct mm_struct *mm;
+	unsigned long next;
+	unsigned long *buf;
+	unsigned long pos;
+	size_t count;
+	int index;
+	char __user *out;
+};
+
+static int flush_pagemap(struct pagemapread *pm)
+{
+	int n = min(pm->count, pm->index * sizeof(unsigned long));
+	if (copy_to_user(pm->out, pm->buf, n))
+		return -EFAULT;
+	pm->out += n;
+	pm->pos += n;
+	pm->count -= n;
+	pm->index = 0;
+	cond_resched();
+	return 0;
+}
+
+static int add_to_pagemap(unsigned long addr, unsigned long pfn,
+			  struct pagemapread *pm)
+{
+	pm->buf[pm->index++] = pfn;
+	pm->next = addr + PAGE_SIZE;
+	if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
+	    pm->index * sizeof(unsigned long) >= pm->count)
+		return flush_pagemap(pm);
+	return 0;
+}
+
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			     void *private)
+{
+	struct pagemapread *pm = private;
+	pte_t *pte;
+	int err;
+
+	pte = pte_offset_map(pmd, addr);
+	for (; addr != end; pte++, addr += PAGE_SIZE) {
+		if (addr < pm->next)
+			continue;
+		if (!pte_present(*pte))
+			err = add_to_pagemap(addr, -1, pm);
+		else
+			err = add_to_pagemap(addr, pte_pfn(*pte), pm);
+		if (err)
+			return err;
+	}
+	pte_unmap(pte - 1);
+	return 0;
+}
+
+static int pagemap_fill(struct pagemapread *pm, unsigned long end)
+{
+	int ret;
+
+	while (pm->next != end) {
+		ret = add_to_pagemap(pm->next, -1UL, pm);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
+
+/* /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one long
+ * representing the corresponding physical page frame number (PFN) or
+ * -1 if the page isn't present. This allows determining precisely
+ * which pages are mapped and comparing mapped pages between
+ * processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	unsigned long src = *ppos;
+	unsigned long *page;
+	unsigned long addr, end, vend, svpfn, evpfn;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	struct pagemapread pm;
+	int ret = -ESRCH;
+
+	if (!task)
+		goto out_no_task;
+
+	ret = -EACCES;
+	if (!ptrace_may_attach(task))
+		goto out;
+
+	ret = -EIO;
+	svpfn = src / sizeof(unsigned long);
+	addr = PAGE_SIZE * svpfn;
+	if (svpfn * sizeof(unsigned long) != src)
+		goto out;
+	evpfn = min((src + count) / sizeof(unsigned long),
+		    ((~0UL) >> PAGE_SHIFT) + 1);
+	count = (evpfn - svpfn) * sizeof(unsigned long);
+	end = PAGE_SIZE * evpfn;
+
+	ret = -ENOMEM;
+	page = kzalloc(PAGE_SIZE, GFP_USER);
+	if (!page)
+		goto out;
+
+	ret = 0;
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out_free;
+
+	pm.mm = mm;
+	pm.next = addr;
+	pm.buf = (unsigned long *)page;
+	pm.pos = src;
+	pm.count = count;
+	pm.index = 0;
+	pm.out = buf;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, pm.next);
+	while (pm.count > 0 && vma) {
+		if (!ptrace_may_attach(task)) {
+			ret = -EIO;
+			goto out;
+		}
+		vend = min(vma->vm_start - 1, end - 1) + 1;
+		ret = pagemap_fill(&pm, vend);
+		if (ret || !pm.count)
+			break;
+		vend = min(vma->vm_end - 1, end - 1) + 1;
+		ret = walk_page_range(mm, vma->vm_start, vend,
+				      &pagemap_walk, &pm);
+		vma = vma->vm_next;
+	}
+	up_read(&mm->mmap_sem);
+
+	ret = pagemap_fill(&pm, end);
+
+	*ppos = pm.pos;
+	if (!ret)
+		ret = pm.pos - src;
+
+	mmput(mm);
+out_free:
+	kfree(page);
+out:
+	put_task_struct(task);
+out_no_task:
+	return ret;
+}
+
+const struct file_operations proc_pagemap_operations = {
+	.llseek		= mem_lseek, /* borrow this */
+	.read		= pagemap_read,
+};
+#endif
Index: mm/init/Kconfig
===================================================================
--- mm.orig/init/Kconfig	2007-04-03 14:50:33.000000000 -0500
+++ mm/init/Kconfig	2007-04-03 17:57:29.000000000 -0500
@@ -602,6 +602,16 @@ config PROC_CLEAR_REFS
           working set size. Disabling this interface will reduce
           the size of the kernel for small machines.
 
+config PROC_PAGEMAP
+	default y
+	bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU
+	help
+	  The /proc/pid/pagemap interface allows reading the
+          kernel's virtual memory to page frame mapping to determine which
+          individual pages a process has mapped and which pages it shares
+          with other processes. Disabling this interface will reduce the
+          size of the kernel for small machines.
+
 endmenu		# General setup
 
 config RT_MUTEXES

  parent reply	other threads:[~2007-04-04  2:43 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-04  2:43 [PATCH 0/13] maps: pagemap, kpagemap, and related cleanups Matt Mackall
2007-04-04  2:43 ` [PATCH 1/13] maps: Uninline some functions in the page walker Matt Mackall
2007-04-04  2:43 ` [PATCH 2/13] maps: Eliminate the pmd_walker struct " Matt Mackall
2007-04-04  2:43 ` [PATCH 3/13] maps: Remove vma from args " Matt Mackall
2007-04-04  2:43 ` [PATCH 4/13] maps: Propagate errors from callback in " Matt Mackall
2007-04-04  2:43 ` [PATCH 5/13] maps: Add callbacks for each level to " Matt Mackall
2007-04-04  2:43 ` [PATCH 6/13] maps: Move the page walker code to lib/ Matt Mackall
2007-04-04  3:51   ` Nick Piggin
2007-04-04  5:08     ` Matt Mackall
2007-04-04  5:50       ` Nick Piggin
2007-04-04 21:48         ` Matt Mackall
2007-04-05  1:32           ` Nick Piggin
2007-04-05  1:50             ` Nick Piggin
2007-04-04  2:43 ` [PATCH 7/13] maps: Simplify interdependence of /proc/pid/maps and smaps Matt Mackall
2007-04-04  2:43 ` [PATCH 8/13] maps: Move clear_refs code to task_mmu.c Matt Mackall
2007-04-04  2:43 ` [PATCH 9/13] maps: Regroup task_mmu by interface Matt Mackall
2007-04-04  2:43 ` [PATCH 10/13] maps: Make /proc/pid/smaps optional under CONFIG_EMBEDDED Matt Mackall
2007-04-04  2:43 ` [PATCH 11/13] maps: Make /proc/pid/clear_refs option " Matt Mackall
2007-04-04  6:22   ` David Rientjes
2007-04-04  2:43 ` Matt Mackall [this message]
2007-04-04 11:18   ` [PATCH 12/13] maps: Add /proc/pid/pagemap interface Nikita Danilov
2007-04-04 16:32     ` Matt Mackall
2007-04-04 18:03       ` Nikita Danilov
2007-04-04 21:59         ` Matt Mackall
2007-04-04  2:43 ` [PATCH 13/13] maps: Add /proc/kpagemap interface Matt Mackall
2007-04-12 23:10 ` [PATCH 0/13] maps: pagemap, kpagemap, and related cleanups William Lee Irwin III
2007-04-12 23:32   ` Andrew Morton
2007-04-12 23:42     ` William Lee Irwin III
2007-04-13  0:25       ` Nick Piggin
2007-04-13  0:15     ` Nick Piggin
2007-04-13  0:25       ` Matt Mackall
2007-04-13  1:01         ` Nick Piggin
2007-04-13  1:38           ` Matt Mackall
2007-04-13  2:11             ` Nick Piggin
2007-04-13  0:42       ` Andrew Morton
2007-04-13  1:14         ` Nick Piggin
2007-04-13  1:22           ` Andrew Morton
2007-04-13  1:42             ` Nick Piggin
2007-04-13  1:57               ` Matt Mackall
2007-04-13  2:21                 ` Nick Piggin
2007-04-13  2:23                   ` Matt Mackall
2007-04-13  2:54                     ` Nick Piggin
2007-04-13 12:24                       ` Ananth N Mavinakayanahalli
2007-04-14  8:13                     ` Maneesh Soni
2007-04-13  1:57               ` Andrew Morton
2007-04-13  2:05                 ` Matt Mackall
2007-04-13  2:29                   ` Nick Piggin
2007-04-13  2:18                 ` Nick Piggin
2007-04-13  2:32                   ` Andrew Morton
2007-04-13  2:50                     ` Nick Piggin
2007-04-13  3:10                       ` Nick Piggin
2007-04-13  6:53                       ` William Lee Irwin III
2007-04-13  7:05                         ` Nick Piggin
2007-04-13  7:51                           ` Christoph Hellwig
2007-04-13  8:03                             ` Nick Piggin
2007-04-13  8:13                               ` Christoph Hellwig
2007-04-13  8:25                                 ` Nick Piggin
2007-04-13  9:46                                   ` Christoph Hellwig
2007-04-13 21:17                                 ` Frank Ch. Eigler
2007-04-16 10:59                                   ` Christoph Hellwig
2007-04-16 21:36                                 ` Andi Kleen
2007-04-16 21:01                                   ` Frank Ch. Eigler
2007-04-13  8:15                             ` William Lee Irwin III
2007-04-13 12:13                       ` Ananth N Mavinakayanahalli
2007-04-13 12:46                         ` Nick Piggin
2007-04-13  3:40                     ` Nick Piggin
2007-04-13  6:55                       ` William Lee Irwin III
2007-04-13  7:03                         ` Nick Piggin
2007-04-13  7:08                           ` William Lee Irwin III
2007-04-13 14:08                       ` Theodore Tso
2007-04-16 11:00                         ` Christoph Hellwig
2007-04-13 17:13                   ` Matt Mackall
2007-04-13 16:24         ` Matt Mackall
2007-04-13 17:03           ` Andrew Morton
2007-04-13 17:24             ` Matt Mackall
2007-04-13 17:58               ` Andrew Morton
2007-04-13  0:15     ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=13.486631555@selenic.com \
    --to=mpm@selenic.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox