All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matt Mackall <mpm@selenic.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Subject: [PATCH 12/13] maps: Add /proc/pid/pagemap interface
Date: Tue, 03 Apr 2007 21:43:43 -0500	[thread overview]
Message-ID: <13.486631555@selenic.com> (raw)
In-Reply-To: <1.486631555@selenic.com>

Add /proc/pid/pagemap interface

This interface provides a mapping for each page in an address space to
its physical page frame number, allowing precise determination of what
pages are mapped and what pages are shared between processes.

Signed-off-by: Matt Mackall <mpm@selenic.com>

Index: mm/fs/proc/base.c
===================================================================
--- mm.orig/fs/proc/base.c	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/base.c	2007-04-03 14:50:33.000000000 -0500
@@ -664,7 +664,7 @@ out_no_task:
 }
 #endif
 
-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+loff_t mem_lseek(struct file * file, loff_t offset, int orig)
 {
 	switch (orig) {
 	case 0:
@@ -2006,6 +2006,9 @@ static const struct pid_entry tgid_base_
 #ifdef CONFIG_PROC_SMAPS
 	REG("smaps",      S_IRUGO, smaps),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+	REG("pagemap",    S_IRUSR, pagemap),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
@@ -2293,6 +2296,9 @@ static const struct pid_entry tid_base_s
 #ifdef CONFIG_PROC_SMAPS
 	REG("smaps",     S_IRUGO, smaps),
 #endif
+#ifdef CONFIG_PROC_PAGEMAP
+	REG("pagemap",    S_IRUSR, pagemap),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
Index: mm/fs/proc/internal.h
===================================================================
--- mm.orig/fs/proc/internal.h	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/internal.h	2007-04-03 14:50:33.000000000 -0500
@@ -45,11 +45,13 @@ extern int proc_tid_stat(struct task_str
 extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
+extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
 
 extern const struct file_operations proc_maps_operations;
 extern const struct file_operations proc_numa_maps_operations;
 extern const struct file_operations proc_smaps_operations;
 extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
 
 void free_proc_entry(struct proc_dir_entry *de);
 
Index: mm/fs/proc/task_mmu.c
===================================================================
--- mm.orig/fs/proc/task_mmu.c	2007-04-03 14:50:33.000000000 -0500
+++ mm/fs/proc/task_mmu.c	2007-04-03 18:02:47.000000000 -0500
@@ -530,3 +530,171 @@ const struct file_operations proc_numa_m
 };
 #endif
 
+#ifdef CONFIG_PROC_PAGEMAP
+struct pagemapread {
+	struct mm_struct *mm;
+	unsigned long next;
+	unsigned long *buf;
+	unsigned long pos;
+	size_t count;
+	int index;
+	char __user *out;
+};
+
+static int flush_pagemap(struct pagemapread *pm)
+{
+	int n = min(pm->count, pm->index * sizeof(unsigned long));
+	if (copy_to_user(pm->out, pm->buf, n))
+		return -EFAULT;
+	pm->out += n;
+	pm->pos += n;
+	pm->count -= n;
+	pm->index = 0;
+	cond_resched();
+	return 0;
+}
+
+static int add_to_pagemap(unsigned long addr, unsigned long pfn,
+			  struct pagemapread *pm)
+{
+	pm->buf[pm->index++] = pfn;
+	pm->next = addr + PAGE_SIZE;
+	if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
+	    pm->index * sizeof(unsigned long) >= pm->count)
+		return flush_pagemap(pm);
+	return 0;
+}
+
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			     void *private)
+{
+	struct pagemapread *pm = private;
+	pte_t *pte;
+	int err;
+
+	pte = pte_offset_map(pmd, addr);
+	for (; addr != end; pte++, addr += PAGE_SIZE) {
+		if (addr < pm->next)
+			continue;
+		if (!pte_present(*pte))
+			err = add_to_pagemap(addr, -1, pm);
+		else
+			err = add_to_pagemap(addr, pte_pfn(*pte), pm);
+		if (err)
+			return err;
+	}
+	pte_unmap(pte - 1);
+	return 0;
+}
+
+static int pagemap_fill(struct pagemapread *pm, unsigned long end)
+{
+	int ret;
+
+	while (pm->next != end) {
+		ret = add_to_pagemap(pm->next, -1UL, pm);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
+
+/* /proc/pid/pagemap - an array mapping virtual pages to pfns
+ *
+ * For each page in the address space, this file contains one long
+ * representing the corresponding physical page frame number (PFN) or
+ * -1 if the page isn't present. This allows determining precisely
+ * which pages are mapped and comparing mapped pages between
+ * processes.
+ *
+ * Efficient users of this interface will use /proc/pid/maps to
+ * determine which areas of memory are actually mapped and llseek to
+ * skip over unmapped regions.
+ */
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	unsigned long src = *ppos;
+	unsigned long *page;
+	unsigned long addr, end, vend, svpfn, evpfn;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	struct pagemapread pm;
+	int ret = -ESRCH;
+
+	if (!task)
+		goto out_no_task;
+
+	ret = -EACCES;
+	if (!ptrace_may_attach(task))
+		goto out;
+
+	ret = -EIO;
+	svpfn = src / sizeof(unsigned long);
+	addr = PAGE_SIZE * svpfn;
+	if (svpfn * sizeof(unsigned long) != src)
+		goto out;
+	evpfn = min((src + count) / sizeof(unsigned long),
+		    ((~0UL) >> PAGE_SHIFT) + 1);
+	count = (evpfn - svpfn) * sizeof(unsigned long);
+	end = PAGE_SIZE * evpfn;
+
+	ret = -ENOMEM;
+	page = kzalloc(PAGE_SIZE, GFP_USER);
+	if (!page)
+		goto out;
+
+	ret = 0;
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out_free;
+
+	pm.mm = mm;
+	pm.next = addr;
+	pm.buf = (unsigned long *)page;
+	pm.pos = src;
+	pm.count = count;
+	pm.index = 0;
+	pm.out = buf;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, pm.next);
+	while (pm.count > 0 && vma) {
+		if (!ptrace_may_attach(task)) {
+			ret = -EIO;
+			goto out;
+		}
+		vend = min(vma->vm_start - 1, end - 1) + 1;
+		ret = pagemap_fill(&pm, vend);
+		if (ret || !pm.count)
+			break;
+		vend = min(vma->vm_end - 1, end - 1) + 1;
+		ret = walk_page_range(mm, vma->vm_start, vend,
+				      &pagemap_walk, &pm);
+		vma = vma->vm_next;
+	}
+	up_read(&mm->mmap_sem);
+
+	ret = pagemap_fill(&pm, end);
+
+	*ppos = pm.pos;
+	if (!ret)
+		ret = pm.pos - src;
+
+	mmput(mm);
+out_free:
+	kfree(page);
+out:
+	put_task_struct(task);
+out_no_task:
+	return ret;
+}
+
+const struct file_operations proc_pagemap_operations = {
+	.llseek		= mem_lseek, /* borrow this */
+	.read		= pagemap_read,
+};
+#endif
Index: mm/init/Kconfig
===================================================================
--- mm.orig/init/Kconfig	2007-04-03 14:50:33.000000000 -0500
+++ mm/init/Kconfig	2007-04-03 17:57:29.000000000 -0500
@@ -602,6 +602,16 @@ config PROC_CLEAR_REFS
           working set size. Disabling this interface will reduce
           the size of the kernel for small machines.
 
+config PROC_PAGEMAP
+	default y
+	bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU
+	help
+	  The /proc/pid/pagemap interface allows reading the
+          kernel's virtual memory to page frame mapping to determine which
+          individual pages a process has mapped and which pages it shares
+          with other processes. Disabling this interface will reduce the
+          size of the kernel for small machines.
+
 endmenu		# General setup
 
 config RT_MUTEXES

  parent reply	other threads:[~2007-04-04  2:43 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-04  2:43 [PATCH 0/13] maps: pagemap, kpagemap, and related cleanups Matt Mackall
2007-04-04  2:43 ` [PATCH 1/13] maps: Uninline some functions in the page walker Matt Mackall
2007-04-04  2:43 ` [PATCH 2/13] maps: Eliminate the pmd_walker struct " Matt Mackall
2007-04-04  2:43 ` [PATCH 3/13] maps: Remove vma from args " Matt Mackall
2007-04-04  2:43 ` [PATCH 4/13] maps: Propagate errors from callback in " Matt Mackall
2007-04-04  2:43 ` [PATCH 5/13] maps: Add callbacks for each level to " Matt Mackall
2007-04-04  2:43 ` [PATCH 6/13] maps: Move the page walker code to lib/ Matt Mackall
2007-04-04  3:51   ` Nick Piggin
2007-04-04  5:08     ` Matt Mackall
2007-04-04  5:50       ` Nick Piggin
2007-04-04 21:48         ` Matt Mackall
2007-04-05  1:32           ` Nick Piggin
2007-04-05  1:50             ` Nick Piggin
2007-04-04  2:43 ` [PATCH 7/13] maps: Simplify interdependence of /proc/pid/maps and smaps Matt Mackall
2007-04-04  2:43 ` [PATCH 8/13] maps: Move clear_refs code to task_mmu.c Matt Mackall
2007-04-04  2:43 ` [PATCH 9/13] maps: Regroup task_mmu by interface Matt Mackall
2007-04-04  2:43 ` [PATCH 10/13] maps: Make /proc/pid/smaps optional under CONFIG_EMBEDDED Matt Mackall
2007-04-04  2:43 ` [PATCH 11/13] maps: Make /proc/pid/clear_refs option " Matt Mackall
2007-04-04  6:22   ` David Rientjes
2007-04-04  2:43 ` Matt Mackall [this message]
2007-04-04 11:18   ` [PATCH 12/13] maps: Add /proc/pid/pagemap interface Nikita Danilov
2007-04-04 16:32     ` Matt Mackall
2007-04-04 18:03       ` Nikita Danilov
2007-04-04 21:59         ` Matt Mackall
2007-04-04  2:43 ` [PATCH 13/13] maps: Add /proc/kpagemap interface Matt Mackall
2007-04-12 23:10 ` [PATCH 0/13] maps: pagemap, kpagemap, and related cleanups William Lee Irwin III
2007-04-12 23:32   ` Andrew Morton
2007-04-12 23:42     ` William Lee Irwin III
2007-04-13  0:25       ` Nick Piggin
2007-04-13  0:15     ` Nick Piggin
2007-04-13  0:25       ` Matt Mackall
2007-04-13  1:01         ` Nick Piggin
2007-04-13  1:38           ` Matt Mackall
2007-04-13  2:11             ` Nick Piggin
2007-04-13  0:42       ` Andrew Morton
2007-04-13  1:14         ` Nick Piggin
2007-04-13  1:22           ` Andrew Morton
2007-04-13  1:42             ` Nick Piggin
2007-04-13  1:57               ` Matt Mackall
2007-04-13  2:21                 ` Nick Piggin
2007-04-13  2:23                   ` Matt Mackall
2007-04-13  2:54                     ` Nick Piggin
2007-04-13 12:24                       ` Ananth N Mavinakayanahalli
2007-04-14  8:13                     ` Maneesh Soni
2007-04-13  1:57               ` Andrew Morton
2007-04-13  2:05                 ` Matt Mackall
2007-04-13  2:29                   ` Nick Piggin
2007-04-13  2:18                 ` Nick Piggin
2007-04-13  2:32                   ` Andrew Morton
2007-04-13  2:50                     ` Nick Piggin
2007-04-13  3:10                       ` Nick Piggin
2007-04-13  6:53                       ` William Lee Irwin III
2007-04-13  7:05                         ` Nick Piggin
2007-04-13  7:51                           ` Christoph Hellwig
2007-04-13  8:03                             ` Nick Piggin
2007-04-13  8:13                               ` Christoph Hellwig
2007-04-13  8:25                                 ` Nick Piggin
2007-04-13  9:46                                   ` Christoph Hellwig
2007-04-13 21:17                                 ` Frank Ch. Eigler
2007-04-16 10:59                                   ` Christoph Hellwig
2007-04-16 21:36                                 ` Andi Kleen
2007-04-16 21:01                                   ` Frank Ch. Eigler
2007-04-13  8:15                             ` William Lee Irwin III
2007-04-13 12:13                       ` Ananth N Mavinakayanahalli
2007-04-13 12:46                         ` Nick Piggin
2007-04-13  3:40                     ` Nick Piggin
2007-04-13  6:55                       ` William Lee Irwin III
2007-04-13  7:03                         ` Nick Piggin
2007-04-13  7:08                           ` William Lee Irwin III
2007-04-13 14:08                       ` Theodore Tso
2007-04-16 11:00                         ` Christoph Hellwig
2007-04-13 17:13                   ` Matt Mackall
2007-04-13 16:24         ` Matt Mackall
2007-04-13 17:03           ` Andrew Morton
2007-04-13 17:24             ` Matt Mackall
2007-04-13 17:58               ` Andrew Morton
2007-04-13  0:15     ` Matt Mackall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=13.486631555@selenic.com \
    --to=mpm@selenic.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.