* [PATCH 1/9] /proc/pid/pagemap update (v2)
@ 2007-08-22 23:18 Dave Hansen
2007-08-22 23:18 ` [PATCH 2/9] pagemap: remove file header Dave Hansen
` (7 more replies)
0 siblings, 8 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
This is a series of patches to update /proc/pid/pagemap,
to simplify the code, and fix bugs which caused userspace
memory corruption.
Since it is just in -mm, we should probably roll all of
these together and just update it all at once, or send
a simple drop-on replacement patch. These patches are
all here mostly to help with review.
Matt, if you're OK with these, do you mind if I send
the update into -mm, or would you like to do it?
--
From: Matt Mackall <mpm@selenic.com>
On Mon, Aug 06, 2007 at 01:44:19AM -0500, Dave Boutcher wrote:
>
> Matt, this patch set replaces the two patches I sent earlier and
> contains additional fixes. I've done some reasonably rigorous testing
> on x86_64, but not on a 32 bit arch. I'm pretty sure this isn't worse
> than what's in mm right now, which has some user-space corruption and
> a nasty infinite kernel loop. YMMV.
Dave, here's my current work-in-progress patch to deal with a couple
locking issues, primarily a possible deadlock on the mm semaphore that
can occur if someone unmaps the target buffer while we're walking the
tree. It currently hangs on my box and I haven't had any free cycles
to finish debugging it, but you might want to take a peek at it.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 121 ++++++++++++++++++++------------------------
1 file changed, 55 insertions(+), 66 deletions(-)
diff -puN fs/proc/task_mmu.c~Re-_PATCH_0_3_proc_pid_pagemap_fixes fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~Re-_PATCH_0_3_proc_pid_pagemap_fixes 2007-08-22 16:16:50.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:50.000000000 -0700
@@ -501,37 +501,21 @@ const struct file_operations proc_clear_
};
struct pagemapread {
- struct mm_struct *mm;
unsigned long next;
- unsigned long *buf;
- pte_t *ptebuf;
unsigned long pos;
size_t count;
int index;
- char __user *out;
+ unsigned long __user *out;
};
-static int flush_pagemap(struct pagemapread *pm)
-{
- int n = min(pm->count, pm->index * sizeof(unsigned long));
- if (copy_to_user(pm->out, pm->buf, n))
- return -EFAULT;
- pm->out += n;
- pm->pos += n;
- pm->count -= n;
- pm->index = 0;
- cond_resched();
- return 0;
-}
-
static int add_to_pagemap(unsigned long addr, unsigned long pfn,
struct pagemapread *pm)
{
- pm->buf[pm->index++] = pfn;
+ __put_user(pfn, pm->out);
+ pm->out++;
+ pm->pos += sizeof(unsigned long);
+ pm->count -= sizeof(unsigned long);
pm->next = addr + PAGE_SIZE;
- if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
- pm->index * sizeof(unsigned long) >= pm->count)
- return flush_pagemap(pm);
return 0;
}
@@ -543,14 +527,6 @@ static int pagemap_pte_range(pmd_t *pmd,
int err;
pte = pte_offset_map(pmd, addr);
-
-#ifdef CONFIG_HIGHPTE
- /* copy PTE directory to temporary buffer and unmap it */
- memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte);
- pte_unmap(pte);
- pte = pm->ptebuf;
-#endif
-
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (addr < pm->next)
continue;
@@ -560,11 +536,12 @@ static int pagemap_pte_range(pmd_t *pmd,
err = add_to_pagemap(addr, pte_pfn(*pte), pm);
if (err)
return err;
+ if (pm->count == 0)
+ break;
}
-
-#ifndef CONFIG_HIGHPTE
pte_unmap(pte - 1);
-#endif
+
+ cond_resched();
return 0;
}
@@ -573,7 +550,7 @@ static int pagemap_fill(struct pagemapre
{
int ret;
- while (pm->next != end) {
+ while (pm->next != end && pm->count > 0) {
ret = add_to_pagemap(pm->next, -1UL, pm);
if (ret)
return ret;
@@ -608,15 +585,16 @@ static ssize_t pagemap_read(struct file
{
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
unsigned long src = *ppos;
- unsigned long *page;
- unsigned long addr, end, vend, svpfn, evpfn;
+ struct page **pages, *page;
+ unsigned long addr, end, vend, svpfn, evpfn, uaddr, uend;
struct mm_struct *mm;
struct vm_area_struct *vma;
struct pagemapread pm;
+ int pagecount;
int ret = -ESRCH;
if (!task)
- goto out_no_task;
+ goto out;
ret = -EACCES;
if (!ptrace_may_attach(task))
@@ -628,39 +606,43 @@ static ssize_t pagemap_read(struct file
if ((svpfn + 1) * sizeof(unsigned long) != src)
goto out;
evpfn = min((src + count) / sizeof(unsigned long) - 1,
- ((~0UL) >> PAGE_SHIFT) + 1);
+ ((~0UL) >> PAGE_SHIFT) + 1) - 1;
count = (evpfn - svpfn) * sizeof(unsigned long);
end = PAGE_SIZE * evpfn;
-
- ret = -ENOMEM;
- page = kzalloc(PAGE_SIZE, GFP_USER);
- if (!page)
- goto out;
-
-#ifdef CONFIG_HIGHPTE
- pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER);
- if (!pm.ptebuf)
- goto out_free;
-#endif
+ //printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
ret = 0;
mm = get_task_mm(task);
if (!mm)
- goto out_freepte;
+ goto out;
+
+ ret = -ENOMEM;
+ uaddr = (unsigned long)buf & ~(PAGE_SIZE-1);
+ uend = (unsigned long)(buf + count);
+ pagecount = (uend - uaddr + PAGE_SIZE-1) / PAGE_SIZE;
+ pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto out_task;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, uaddr, pagecount,
+ 1, 0, pages, NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ //printk("%x(%x):%x %d@%ld (%d pages) -> %d\n", uaddr, buf, uend, count, src, pagecount, ret);
+ if (ret < 0)
+ goto out_free;
- pm.mm = mm;
pm.next = addr;
- pm.buf = page;
pm.pos = src;
pm.count = count;
- pm.index = 0;
- pm.out = buf;
+ pm.out = (unsigned long __user *)buf;
if (svpfn == -1) {
- ((char *)page)[0] = (ntohl(1) != 1);
- ((char *)page)[1] = PAGE_SHIFT;
- ((char *)page)[2] = sizeof(unsigned long);
- ((char *)page)[3] = sizeof(unsigned long);
+ put_user((char)(ntohl(1) != 1), buf);
+ put_user((char)PAGE_SHIFT, buf + 1);
+ put_user((char)sizeof(unsigned long), buf + 2);
+ put_user((char)sizeof(unsigned long), buf + 3);
add_to_pagemap(pm.next, page[0], &pm);
}
@@ -669,7 +651,8 @@ static ssize_t pagemap_read(struct file
while (pm.count > 0 && vma) {
if (!ptrace_may_attach(task)) {
ret = -EIO;
- goto out_mm;
+ up_read(&mm->mmap_sem);
+ goto out_release;
}
vend = min(vma->vm_end - 1, end - 1) + 1;
ret = pagemap_fill(&pm, vend);
@@ -682,23 +665,29 @@ static ssize_t pagemap_read(struct file
}
up_read(&mm->mmap_sem);
+ //printk("before fill at %ld\n", pm.pos);
ret = pagemap_fill(&pm, end);
+ printk("after fill at %ld\n", pm.pos);
*ppos = pm.pos;
if (!ret)
ret = pm.pos - src;
-out_mm:
+out_release:
+ printk("releasing pages\n");
+ for (; pagecount; pagecount--) {
+ page = pages[pagecount-1];
+ if (!PageReserved(page))
+ SetPageDirty(page);
+ page_cache_release(page);
+ }
mmput(mm);
-out_freepte:
-#ifdef CONFIG_HIGHPTE
- kfree(pm.ptebuf);
out_free:
-#endif
- kfree(page);
-out:
+ kfree(pages);
+out_task:
put_task_struct(task);
-out_no_task:
+out:
+ printk("returning\n");
return ret;
}
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/9] pagemap: remove file header
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
The /proc/<pid>/pagemap file has a header containing:
* first byte: 0 for big endian, 1 for little
* second byte: page shift (eg 12 for 4096 byte pages)
* third byte: entry size in bytes (currently either 4 or 8)
* fourth byte: header size
The endianness is only useful when examining a raw dump of
pagemap from a different machine when you don't know the
source of the file. This is pretty rare, and the programs
or scripts doing the copying off-machine can certainly be
made to hold this information.
The page size is available in userspace at least with libc's
getpagesize(). This will also never vary across processes,
so putting it in a per-process file doesn't make any difference.
If we need a "kernel's page size" exported to userspace,
perhaps we can put it in /proc/meminfo.
The entry size is the really tricky one. This can't just
be sizeof(unsigned long) from userspace because we can have
32-bit processes on 64-bit kernels. But, userspace can
certainly derive this value if it lseek()s to the end of
the file, and divides the file position by the size of its
virtual address space.
In any case, I believe this information is redundant, and
can be removed.
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff -puN fs/proc/task_mmu.c~pagemap-no-header fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~pagemap-no-header 2007-08-22 16:16:51.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:51.000000000 -0700
@@ -601,12 +601,12 @@ static ssize_t pagemap_read(struct file
goto out;
ret = -EIO;
- svpfn = src / sizeof(unsigned long) - 1;
+ svpfn = src / sizeof(unsigned long);
addr = PAGE_SIZE * svpfn;
- if ((svpfn + 1) * sizeof(unsigned long) != src)
+ if (svpfn * sizeof(unsigned long) != src)
goto out;
evpfn = min((src + count) / sizeof(unsigned long) - 1,
- ((~0UL) >> PAGE_SHIFT) + 1) - 1;
+ ((~0UL) >> PAGE_SHIFT) + 1);
count = (evpfn - svpfn) * sizeof(unsigned long);
end = PAGE_SIZE * evpfn;
//printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
@@ -638,14 +638,6 @@ static ssize_t pagemap_read(struct file
pm.count = count;
pm.out = (unsigned long __user *)buf;
- if (svpfn == -1) {
- put_user((char)(ntohl(1) != 1), buf);
- put_user((char)PAGE_SHIFT, buf + 1);
- put_user((char)sizeof(unsigned long), buf + 2);
- put_user((char)sizeof(unsigned long), buf + 3);
- add_to_pagemap(pm.next, page[0], &pm);
- }
-
down_read(&mm->mmap_sem);
vma = find_vma(mm, pm.next);
while (pm.count > 0 && vma) {
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN()
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
2007-08-22 23:18 ` [PATCH 2/9] pagemap: remove file header Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
Use existing macros (PAGE_MASK/PAGE_ALIGN()) instead of
open-coding them.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff -puN fs/proc/task_mmu.c~pagemap-use-PAGE_MASK fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~pagemap-use-PAGE_MASK 2007-08-22 16:16:51.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:51.000000000 -0700
@@ -617,9 +617,9 @@ static ssize_t pagemap_read(struct file
goto out;
ret = -ENOMEM;
- uaddr = (unsigned long)buf & ~(PAGE_SIZE-1);
+ uaddr = (unsigned long)buf & PAGE_MASK;
uend = (unsigned long)(buf + count);
- pagecount = (uend - uaddr + PAGE_SIZE-1) / PAGE_SIZE;
+ pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_task;
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long)
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
2007-08-22 23:18 ` [PATCH 2/9] pagemap: remove file header Dave Hansen
2007-08-22 23:18 ` [PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
I think the code gets easier to read when we give symbolic names
to some of the operations we're performing. I was sure we needed
this when I saw the header being built like this:
...
buf[2] = sizeof(unsigned long)
buf[3] = sizeof(unsigned long)
I really couldn't remember what either field did ;(
This particular use is gone (because of removing the header, but
this patch is still a really good idea to clarify the code).
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff -puN fs/proc/task_mmu.c~pagemap-use-ENTRY_SIZE fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~pagemap-use-ENTRY_SIZE 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:52.000000000 -0700
@@ -508,14 +508,16 @@ struct pagemapread {
unsigned long __user *out;
};
+#define PM_ENTRY_BYTES sizeof(unsigned long)
+
static int add_to_pagemap(unsigned long addr, unsigned long pfn,
struct pagemapread *pm)
{
__put_user(pfn, pm->out);
pm->out++;
- pm->pos += sizeof(unsigned long);
- pm->count -= sizeof(unsigned long);
pm->next = addr + PAGE_SIZE;
+ pm->pos += PM_ENTRY_BYTES;
+ pm->count -= PM_ENTRY_BYTES;
return 0;
}
@@ -601,13 +603,13 @@ static ssize_t pagemap_read(struct file
goto out;
ret = -EIO;
- svpfn = src / sizeof(unsigned long);
+ svpfn = src / PM_ENTRY_BYTES;
addr = PAGE_SIZE * svpfn;
- if (svpfn * sizeof(unsigned long) != src)
+ if (svpfn * PM_ENTRY_BYTES != src)
goto out;
evpfn = min((src + count) / sizeof(unsigned long) - 1,
((~0UL) >> PAGE_SHIFT) + 1);
- count = (evpfn - svpfn) * sizeof(unsigned long);
+ count = (evpfn - svpfn) * PM_ENTRY_BYTES;
end = PAGE_SIZE * evpfn;
//printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 5/9] introduce TASK_SIZE_OF() for all arches
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
` (2 preceding siblings ...)
2007-08-22 23:18 ` [PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 6/9] pagemap: give -1's a name Dave Hansen
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
(already sent to linux-arch, just repeating here in case someone
wants to test these in their entirety)
For the /proc/<pid>/pagemap code[1], we need to able to query how
much virtual address space a particular task has. The trick is
that we do it through /proc and can't use TASK_SIZE since it
references "current" on some arches. The process opening the
/proc file might be a 32-bit process opening a 64-bit process's
pagemap file.
x86_64 already has a TASK_SIZE_OF() macro:
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
I'd like to have that for other architectures. So, add it
for all the architectures that actually use "current" in
their TASK_SIZE. For the others, just add a quick #define
in sched.h to use plain old TASK_SIZE.
1. http://www.linuxworld.com/news/2007/042407-kernel.html
- MIPS portion from Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
lxc-dave/include/asm-ia64/processor.h | 3 ++-
lxc-dave/include/asm-mips/processor.h | 4 ++++
lxc-dave/include/asm-parisc/processor.h | 3 ++-
lxc-dave/include/asm-powerpc/processor.h | 4 +++-
lxc-dave/include/asm-s390/processor.h | 2 ++
lxc-dave/include/linux/sched.h | 4 ++++
6 files changed, 17 insertions(+), 3 deletions(-)
diff -puN include/asm-ia64/processor.h~task_size_of include/asm-ia64/processor.h
--- lxc/include/asm-ia64/processor.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/asm-ia64/processor.h 2007-08-22 16:16:52.000000000 -0700
@@ -31,7 +31,8 @@
* each (assuming 8KB page size), for a total of 8TB of user virtual
* address space.
*/
-#define TASK_SIZE (current->thread.task_size)
+#define TASK_SIZE_OF(tsk) ((tsk)->thread.task_size)
+#define TASK_SIZE TASK_SIZE_OF(current)
/*
* This decides where the kernel will search for a free chunk of vm
diff -puN include/asm-mips/processor.h~task_size_of include/asm-mips/processor.h
--- lxc/include/asm-mips/processor.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/asm-mips/processor.h 2007-08-22 16:16:53.000000000 -0700
@@ -45,6 +45,8 @@ extern unsigned int vced_count, vcei_cou
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_SIZE_OF(tsk) \
+ (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE)
#endif
#ifdef CONFIG_64BIT
@@ -65,6 +67,8 @@ extern unsigned int vced_count, vcei_cou
#define TASK_UNMAPPED_BASE \
(test_thread_flag(TIF_32BIT_ADDR) ? \
PAGE_ALIGN(TASK_SIZE32 / 3) : PAGE_ALIGN(TASK_SIZE / 3))
+#define TASK_SIZE_OF(tsk) \
+ (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE)
#endif
#define NUM_FPU_REGS 32
diff -puN include/asm-parisc/processor.h~task_size_of include/asm-parisc/processor.h
--- lxc/include/asm-parisc/processor.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/asm-parisc/processor.h 2007-08-22 16:16:52.000000000 -0700
@@ -32,7 +32,8 @@
#endif
#define current_text_addr() ({ void *pc; current_ia(pc); pc; })
-#define TASK_SIZE (current->thread.task_size)
+#define TASK_SIZE_OF(tsk) ((tsk)->thread.task_size)
+#define TASK_SIZE (current->thread.task_size)
#define TASK_UNMAPPED_BASE (current->thread.map_base)
#define DEFAULT_TASK_SIZE32 (0xFFF00000UL)
diff -puN include/asm-powerpc/processor.h~task_size_of include/asm-powerpc/processor.h
--- lxc/include/asm-powerpc/processor.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/asm-powerpc/processor.h 2007-08-22 16:16:52.000000000 -0700
@@ -99,7 +99,9 @@ extern struct task_struct *last_task_use
*/
#define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
-#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_USER32 : TASK_SIZE_USER64)
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
TASK_SIZE_USER32 : TASK_SIZE_USER64)
/* This decides where the kernel will search for a free chunk of vm
diff -puN include/asm-s390/processor.h~task_size_of include/asm-s390/processor.h
--- lxc/include/asm-s390/processor.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/asm-s390/processor.h 2007-08-22 16:16:52.000000000 -0700
@@ -75,6 +75,8 @@ extern struct task_struct *last_task_use
# define TASK_SIZE (test_thread_flag(TIF_31BIT) ? \
(0x80000000UL) : (0x40000000000UL))
+# define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+ (0x80000000UL) : (0x40000000000UL))
# define TASK_UNMAPPED_BASE (TASK_SIZE / 2)
# define DEFAULT_TASK_SIZE (0x40000000000UL)
diff -puN include/linux/sched.h~task_size_of include/linux/sched.h
--- lxc/include/linux/sched.h~task_size_of 2007-08-22 16:16:52.000000000 -0700
+++ lxc-dave/include/linux/sched.h 2007-08-22 16:16:52.000000000 -0700
@@ -1810,6 +1810,10 @@ static inline void inc_syscw(struct task
}
#endif
+#ifndef TASK_SIZE_OF
+#define TASK_SIZE_OF(tsk) TASK_SIZE
+#endif
+
#endif /* __KERNEL__ */
#endif
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 6/9] pagemap: give -1's a name
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
` (3 preceding siblings ...)
2007-08-22 23:18 ` [PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
-1 is a magic number in /proc/$pid/pagemap. It means that
there was no pte present for a particular page. We're
going to be refining that a bit shortly, so give this a
real name for now.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff -puN fs/proc/task_mmu.c~give_-1s_a_name fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~give_-1s_a_name 2007-08-22 16:16:53.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:53.000000000 -0700
@@ -509,6 +509,7 @@ struct pagemapread {
};
#define PM_ENTRY_BYTES sizeof(unsigned long)
+#define PM_NOT_PRESENT ((unsigned long)-1)
static int add_to_pagemap(unsigned long addr, unsigned long pfn,
struct pagemapread *pm)
@@ -533,7 +534,7 @@ static int pagemap_pte_range(pmd_t *pmd,
if (addr < pm->next)
continue;
if (!pte_present(*pte))
- err = add_to_pagemap(addr, -1, pm);
+ err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
else
err = add_to_pagemap(addr, pte_pfn(*pte), pm);
if (err)
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 7/9] pagewalk: add handler for empty ranges
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
` (4 preceding siblings ...)
2007-08-22 23:18 ` [PATCH 6/9] pagemap: give -1's a name Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:18 ` [PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
2007-08-22 23:18 ` [PATCH 9/9] pagemap: export swap ptes Dave Hansen
7 siblings, 0 replies; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
There's a pretty good deal of complexity surrounding dealing
with a sparse address space in the /proc/<pid>/pagemap code.
We have the pm->next pointer to help indicate how far we've
walked in the pagetables. We also attempt to fill empty
areas without vmas manually.
This code adds an extension to the mm_walk code: a new handler
for "empty" pte ranges. Those are areas where there is no
pte page present. This allows us to get rid of the code that
inspects VMAs or that trys to keep track of how much of the
pagemap we have filled.
Note that this truly does walk pte *holes*. That isn't just
places where we have a pte_none(). It includes places where
there are any missing higher-level pagetable entries like
puds.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/include/linux/mm.h | 1
lxc-dave/lib/pagewalk.c | 67 +++++++++++++++++++-------------------------
2 files changed, 30 insertions(+), 38 deletions(-)
diff -puN include/linux/mm.h~pagewalk-empty-ranges include/linux/mm.h
--- lxc/include/linux/mm.h~pagewalk-empty-ranges 2007-08-22 16:16:54.000000000 -0700
+++ lxc-dave/include/linux/mm.h 2007-08-22 16:16:54.000000000 -0700
@@ -699,6 +699,7 @@ struct mm_walk {
int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
+ int (*pte_hole) (unsigned long, unsigned long, void *);
};
int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end,
diff -puN lib/pagewalk.c~pagewalk-empty-ranges lib/pagewalk.c
--- lxc/lib/pagewalk.c~pagewalk-empty-ranges 2007-08-22 16:16:54.000000000 -0700
+++ lxc-dave/lib/pagewalk.c 2007-08-22 16:16:54.000000000 -0700
@@ -6,17 +6,13 @@ static int walk_pte_range(pmd_t *pmd, un
struct mm_walk *walk, void *private)
{
pte_t *pte;
- int err;
+ int err = 0;
for (pte = pte_offset_map(pmd, addr); addr != end;
addr += PAGE_SIZE, pte++) {
- if (pte_none(*pte))
- continue;
err = walk->pte_entry(pte, addr, addr, private);
- if (err) {
- pte_unmap(pte);
- return err;
- }
+ if (err)
+ break;
}
pte_unmap(pte);
return 0;
@@ -27,25 +23,23 @@ static int walk_pmd_range(pud_t *pud, un
{
pmd_t *pmd;
unsigned long next;
- int err;
+ int err = 0;
for (pmd = pmd_offset(pud, addr); addr != end;
pmd++, addr = next) {
next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
+ if (pmd_none(*pmd)) {
+ err = walk->pte_hole(addr, next, private);
+ } else if (pmd_none_or_clear_bad(pmd))
continue;
- if (walk->pmd_entry) {
+ if (!err && walk->pmd_entry)
err = walk->pmd_entry(pmd, addr, next, private);
- if (err)
- return err;
- }
- if (walk->pte_entry) {
+ if (!err && walk->pte_entry)
err = walk_pte_range(pmd, addr, next, walk, private);
- if (err)
- return err;
- }
+ if (err)
+ break;
}
- return 0;
+ return err;
}
static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
@@ -53,23 +47,21 @@ static int walk_pud_range(pgd_t *pgd, un
{
pud_t *pud;
unsigned long next;
- int err;
+ int err = 0;
for (pud = pud_offset(pgd, addr); addr != end;
pud++, addr = next) {
next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
+ if (pud_none(*pud)) {
+ err = walk->pte_hole(addr, next, private);
+ } else if (pud_none_or_clear_bad(pud))
continue;
- if (walk->pud_entry) {
+ if (!err && walk->pud_entry)
err = walk->pud_entry(pud, addr, next, private);
- if (err)
- return err;
- }
- if (walk->pmd_entry || walk->pte_entry) {
+ if (!err && (walk->pmd_entry || walk->pte_entry))
err = walk_pmd_range(pud, addr, next, walk, private);
- if (err)
- return err;
- }
+ if (err)
+ return err;
}
return 0;
}
@@ -91,23 +83,22 @@ int walk_page_range(struct mm_struct *mm
{
pgd_t *pgd;
unsigned long next;
- int err;
+ int err = 0;
for (pgd = pgd_offset(mm, addr); addr != end;
pgd++, addr = next) {
next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
+ if (pgd_none(*pgd)) {
+ err = walk->pte_hole(addr, next, private);
+ } else if (pgd_none_or_clear_bad(pgd))
continue;
- if (walk->pgd_entry) {
+ if (!err && walk->pgd_entry)
err = walk->pgd_entry(pgd, addr, next, private);
- if (err)
- return err;
- }
- if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) {
+ if (!err &&
+ (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
err = walk_pud_range(pgd, addr, next, walk, private);
- if (err)
- return err;
- }
+ if (err)
+ return err;
}
return 0;
}
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 8/9] pagemap: use page walker pte_hole() helper
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
` (5 preceding siblings ...)
2007-08-22 23:18 ` [PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-22 23:54 ` Matt Mackall
2007-08-22 23:18 ` [PATCH 9/9] pagemap: export swap ptes Dave Hansen
7 siblings, 1 reply; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
I tried to do this a bit more incrementally, but it ended
up just looking like an even worse mess. So, this does
a a couple of different things.
1. use page walker pte_hole() helper, which
2. gets rid of the "next" value in "struct pagemapread"
3. allow 1-3 byte reads from pagemap. This at least
ensures that we don't write over user memory if they
ask us for 1 bytes and we tried to write 4.
4. Instead of trying to calculate what ranges of pages
we are going to walk, simply start walking them,
then return PAGEMAP_END_OF_BUFFER at the end of the
buffer, error out, and stop walking.
5. enforce that reads must be algined to PM_ENTRY_BYTES
Note that, despite these functional additions, and some
nice new comments, this patch still removes more code
than it adds.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 129 +++++++++++++++++++++-----------------------
1 file changed, 62 insertions(+), 67 deletions(-)
diff -puN fs/proc/task_mmu.c~bail-instead-of-tracking fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~bail-instead-of-tracking 2007-08-22 16:16:54.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:54.000000000 -0700
@@ -501,7 +501,6 @@ const struct file_operations proc_clear_
};
struct pagemapread {
- unsigned long next;
unsigned long pos;
size_t count;
int index;
@@ -510,58 +509,70 @@ struct pagemapread {
#define PM_ENTRY_BYTES sizeof(unsigned long)
#define PM_NOT_PRESENT ((unsigned long)-1)
+#define PAGEMAP_END_OF_BUFFER 1
static int add_to_pagemap(unsigned long addr, unsigned long pfn,
struct pagemapread *pm)
{
- __put_user(pfn, pm->out);
- pm->out++;
- pm->next = addr + PAGE_SIZE;
+ /*
+ * Make sure there's room in the buffer for an
+ * entire entry. Otherwise, only copy part of
+ * the pfn.
+ */
+ if (pm->count >= PM_ENTRY_BYTES)
+ __put_user(pfn pm->out);
+ else
+ copy_to_user(pm->out, &pfn, pm->count);
+
pm->pos += PM_ENTRY_BYTES;
pm->count -= PM_ENTRY_BYTES;
+ if (pm->count <= 0)
+ return PAGEMAP_END_OF_BUFFER;
return 0;
}
+static int pagemap_pte_hole(unsigned long start, unsigned long end,
+ void *private)
+{
+ struct pagemapread *pm = private;
+ unsigned long addr;
+ int err = 0;
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
+ if (err)
+ break;
+ }
+ return err;
+}
+
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
void *private)
{
struct pagemapread *pm = private;
pte_t *pte;
- int err;
+ int err = 0;
pte = pte_offset_map(pmd, addr);
for (; addr != end; pte++, addr += PAGE_SIZE) {
- if (addr < pm->next)
- continue;
- if (!pte_present(*pte))
- err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
- else
- err = add_to_pagemap(addr, pte_pfn(*pte), pm);
+ unsigned long pfn = PM_NOT_PRESENT;
+ if (pte_present(*pte))
+ pfn = pte_pfn(*pte);
+ err = add_to_pagemap(addr, pfn, pm);
if (err)
return err;
- if (pm->count == 0)
- break;
}
pte_unmap(pte - 1);
cond_resched();
- return 0;
+ return err;
}
-static int pagemap_fill(struct pagemapread *pm, unsigned long end)
+static struct mm_walk pagemap_walk =
{
- int ret;
-
- while (pm->next != end && pm->count > 0) {
- ret = add_to_pagemap(pm->next, -1UL, pm);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
+ .pmd_entry = pagemap_pte_range,
+ .pte_hole = pagemap_pte_hole
+};
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
@@ -589,9 +600,8 @@ static ssize_t pagemap_read(struct file
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
unsigned long src = *ppos;
struct page **pages, *page;
- unsigned long addr, end, vend, svpfn, evpfn, uaddr, uend;
+ unsigned long uaddr, uend;
struct mm_struct *mm;
- struct vm_area_struct *vma;
struct pagemapread pm;
int pagecount;
int ret = -ESRCH;
@@ -603,16 +613,10 @@ static ssize_t pagemap_read(struct file
if (!ptrace_may_attach(task))
goto out;
- ret = -EIO;
- svpfn = src / PM_ENTRY_BYTES;
- addr = PAGE_SIZE * svpfn;
- if (svpfn * PM_ENTRY_BYTES != src)
+ ret = -EINVAL;
+ /* file position must be aligned */
+ if (*ppos % PM_ENTRY_BYTES)
goto out;
- evpfn = min((src + count) / sizeof(unsigned long) - 1,
- ((~0UL) >> PAGE_SHIFT) + 1);
- count = (evpfn - svpfn) * PM_ENTRY_BYTES;
- end = PAGE_SIZE * evpfn;
- //printk("src %ld svpfn %d evpfn %d count %d\n", src, svpfn, evpfn, count);
ret = 0;
mm = get_task_mm(task);
@@ -632,44 +636,36 @@ static ssize_t pagemap_read(struct file
1, 0, pages, NULL);
up_read(¤t->mm->mmap_sem);
- //printk("%x(%x):%x %d@%ld (%d pages) -> %d\n", uaddr, buf, uend, count, src, pagecount, ret);
if (ret < 0)
goto out_free;
- pm.next = addr;
pm.pos = src;
pm.count = count;
pm.out = (unsigned long __user *)buf;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, pm.next);
- while (pm.count > 0 && vma) {
- if (!ptrace_may_attach(task)) {
- ret = -EIO;
- up_read(&mm->mmap_sem);
- goto out_release;
- }
- vend = min(vma->vm_end - 1, end - 1) + 1;
- ret = pagemap_fill(&pm, vend);
- if (ret || !pm.count)
- break;
- vend = min(vma->vm_end - 1, end - 1) + 1;
- ret = walk_page_range(mm, vma->vm_start, vend,
- &pagemap_walk, &pm);
- vma = vma->vm_next;
+ if (!ptrace_may_attach(task)) {
+ ret = -EIO;
+ } else {
+ unsigned long src = *ppos;
+ unsigned long svpfn = src / PM_ENTRY_BYTES;
+ unsigned long start_vaddr = svpfn << PAGE_SHIFT;
+ unsigned long end_vaddr = TASK_SIZE_OF(task);
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(mm, start_vaddr, end_vaddr,
+ &pagemap_walk, &pm);
+ if (ret == PAGEMAP_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos = pm.pos;
+ if (!ret)
+ ret = pm.pos - src;
}
- up_read(&mm->mmap_sem);
-
- //printk("before fill at %ld\n", pm.pos);
- ret = pagemap_fill(&pm, end);
-
- printk("after fill at %ld\n", pm.pos);
- *ppos = pm.pos;
- if (!ret)
- ret = pm.pos - src;
-out_release:
- printk("releasing pages\n");
for (; pagecount; pagecount--) {
page = pages[pagecount-1];
if (!PageReserved(page))
@@ -682,7 +678,6 @@ out_free:
out_task:
put_task_struct(task);
out:
- printk("returning\n");
return ret;
}
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 9/9] pagemap: export swap ptes
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
` (6 preceding siblings ...)
2007-08-22 23:18 ` [PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
@ 2007-08-22 23:18 ` Dave Hansen
2007-08-24 0:29 ` Matt Mackall
7 siblings, 1 reply; 13+ messages in thread
From: Dave Hansen @ 2007-08-22 23:18 UTC (permalink / raw)
To: mpm; +Cc: linux-mm, Dave Hansen
In addition to understanding which physical pages are
used by a process, it would also be very nice to
enumerate how much swap space a process is using.
This patch enables /proc/<pid>/pagemap to display
swap ptes. In the process, it also changes the
constant that we used to indicate non-present ptes
before.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
lxc-dave/fs/proc/task_mmu.c | 38 +++++++++++++++++++++++++++++++-------
1 file changed, 31 insertions(+), 7 deletions(-)
diff -puN fs/proc/task_mmu.c~pagemap-export-swap-ptes fs/proc/task_mmu.c
--- lxc/fs/proc/task_mmu.c~pagemap-export-swap-ptes 2007-08-22 16:16:55.000000000 -0700
+++ lxc-dave/fs/proc/task_mmu.c 2007-08-22 16:16:55.000000000 -0700
@@ -7,6 +7,8 @@
#include <linux/pagemap.h>
#include <linux/ptrace.h>
#include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -506,9 +508,13 @@ struct pagemapread {
int index;
unsigned long __user *out;
};
-
#define PM_ENTRY_BYTES sizeof(unsigned long)
-#define PM_NOT_PRESENT ((unsigned long)-1)
+#define PM_RESERVED_BITS 3
+#define PM_RESERVED_OFFSET (BITS_PER_LONG-PM_RESERVED_BITS)
+#define PM_RESERVED_MASK (((1<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
+#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
+#define PM_NOT_PRESENT PM_SPECIAL(1)
+#define PM_SWAP PM_SPECIAL(2)
#define PAGEMAP_END_OF_BUFFER 1
static int add_to_pagemap(unsigned long addr, unsigned long pfn,
@@ -545,6 +551,19 @@ static int pagemap_pte_hole(unsigned lon
return err;
}
+unsigned long swap_pte_to_pagemap_entry(pte_t pte)
+{
+ swp_entry_t entry = pte_to_swp_entry(pte);
+ unsigned long offset;
+ unsigned long swap_file_nr;
+
+ offset = swp_offset(entry);
+ swap_file_nr = swp_type(entry);
+ return PM_SWAP | swap_file_nr | (offset << MAX_SWAPFILES_SHIFT);
+}
+
+
+
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
void *private)
{
@@ -555,7 +574,9 @@ static int pagemap_pte_range(pmd_t *pmd,
pte = pte_offset_map(pmd, addr);
for (; addr != end; pte++, addr += PAGE_SIZE) {
unsigned long pfn = PM_NOT_PRESENT;
- if (pte_present(*pte))
+ if (is_swap_pte(*pte))
+ pfn = swap_pte_to_pagemap_entry(*pte);
+ else if (pte_present(*pte))
pfn = pte_pfn(*pte);
err = add_to_pagemap(addr, pfn, pm);
if (err)
@@ -578,10 +599,13 @@ static struct mm_walk pagemap_walk =
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
* For each page in the address space, this file contains one long
- * representing the corresponding physical page frame number (PFN) or
- * -1 if the page isn't present. This allows determining precisely
- * which pages are mapped and comparing mapped pages between
- * processes.
+ * representing the corresponding physical page frame number (PFN)
+ * if the page is present. If there is a swap entry for the
+ * physical page, then an encoding of the swap file number and the
+ * page's offset into the swap file are returned. If no page is
+ * present at all, PM_NOT_PRESENT is returned. This allows
+ * determining precisely which pages are mapped (or in swap) and
+ * comparing mapped pages between processes.
*
* Efficient users of this interface will use /proc/pid/maps to
* determine which areas of memory are actually mapped and llseek to
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 8/9] pagemap: use page walker pte_hole() helper
2007-08-22 23:18 ` [PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
@ 2007-08-22 23:54 ` Matt Mackall
0 siblings, 0 replies; 13+ messages in thread
From: Matt Mackall @ 2007-08-22 23:54 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-mm
On Wed, Aug 22, 2007 at 04:18:13PM -0700, Dave Hansen wrote:
>
> I tried to do this a bit more incrementally, but it ended
> up just looking like an even worse mess. So, this does
> a a couple of different things.
>
> 1. use page walker pte_hole() helper, which
> 2. gets rid of the "next" value in "struct pagemapread"
> 3. allow 1-3 byte reads from pagemap. This at least
> ensures that we don't write over user memory if they
> ask us for 1 bytes and we tried to write 4.
> 4. Instead of trying to calculate what ranges of pages
> we are going to walk, simply start walking them,
> then return PAGEMAP_END_OF_BUFFER at the end of the
> buffer, error out, and stop walking.
> 5. enforce that reads must be algined to PM_ENTRY_BYTES
>
> Note that, despite these functional additions, and some
> nice new comments, this patch still removes more code
> than it adds.
>
> Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
> + if (pm->count >= PM_ENTRY_BYTES)
> + __put_user(pfn pm->out);
I suppose I should have mentioned this typo when I spotted it
yesterday.. Will fix it on my end.
--
Mathematics is the supreme nostalgia of our time.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 9/9] pagemap: export swap ptes
2007-08-22 23:18 ` [PATCH 9/9] pagemap: export swap ptes Dave Hansen
@ 2007-08-24 0:29 ` Matt Mackall
2007-08-24 16:19 ` Dave Hansen
0 siblings, 1 reply; 13+ messages in thread
From: Matt Mackall @ 2007-08-24 0:29 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-mm
On Wed, Aug 22, 2007 at 04:18:14PM -0700, Dave Hansen wrote:
>
> In addition to understanding which physical pages are
> used by a process, it would also be very nice to
> enumerate how much swap space a process is using.
>
> This patch enables /proc/<pid>/pagemap to display
> swap ptes. In the process, it also changes the
> constant that we used to indicate non-present ptes
> before.
>
> Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
I suspect you missed a quilt add here, as is_swap_pte is not in any
header file and is thus implicitly declared.
--
Mathematics is the supreme nostalgia of our time.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 9/9] pagemap: export swap ptes
2007-08-24 0:29 ` Matt Mackall
@ 2007-08-24 16:19 ` Dave Hansen
2007-08-24 16:56 ` Matt Mackall
0 siblings, 1 reply; 13+ messages in thread
From: Dave Hansen @ 2007-08-24 16:19 UTC (permalink / raw)
To: Matt Mackall; +Cc: linux-mm
On Thu, 2007-08-23 at 19:29 -0500, Matt Mackall wrote:
> On Wed, Aug 22, 2007 at 04:18:14PM -0700, Dave Hansen wrote:
> >
> > In addition to understanding which physical pages are
> > used by a process, it would also be very nice to
> > enumerate how much swap space a process is using.
> >
> > This patch enables /proc/<pid>/pagemap to display
> > swap ptes. In the process, it also changes the
> > constant that we used to indicate non-present ptes
> > before.
> >
> > Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
>
> I suspect you missed a quilt add here, as is_swap_pte is not in any
> header file and is thus implicitly declared.
Yeah, I have another patch that was declared waaaaaaay earlier in my
series that does this. I'm not completely confident in the way that I
formatted the swap pte, so let's hold off on just this patch for now.
I'll rework it and send it your way again in a few days.
-- Dave
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 9/9] pagemap: export swap ptes
2007-08-24 16:19 ` Dave Hansen
@ 2007-08-24 16:56 ` Matt Mackall
0 siblings, 0 replies; 13+ messages in thread
From: Matt Mackall @ 2007-08-24 16:56 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-mm
On Fri, Aug 24, 2007 at 09:19:22AM -0700, Dave Hansen wrote:
> On Thu, 2007-08-23 at 19:29 -0500, Matt Mackall wrote:
> > On Wed, Aug 22, 2007 at 04:18:14PM -0700, Dave Hansen wrote:
> > >
> > > In addition to understanding which physical pages are
> > > used by a process, it would also be very nice to
> > > enumerate how much swap space a process is using.
> > >
> > > This patch enables /proc/<pid>/pagemap to display
> > > swap ptes. In the process, it also changes the
> > > constant that we used to indicate non-present ptes
> > > before.
> > >
> > > Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
> >
> > I suspect you missed a quilt add here, as is_swap_pte is not in any
> > header file and is thus implicitly declared.
>
> Yeah, I have another patch that was declared waaaaaaay earlier in my
> series that does this.
I moved it from mm/migrate.c to include/linux/swapops.h here.
> I'm not completely confident in the way that I
> formatted the swap pte, so let's hold off on just this patch for now.
> I'll rework it and send it your way again in a few days.
Yeah, that bit is a little tricky. I'm going to be off the net for the
next week or so, so I can pick this up in September. I should just
need a resend of this patch.
--
Mathematics is the supreme nostalgia of our time.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2007-08-24 16:56 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-22 23:18 [PATCH 1/9] /proc/pid/pagemap update (v2) Dave Hansen
2007-08-22 23:18 ` [PATCH 2/9] pagemap: remove file header Dave Hansen
2007-08-22 23:18 ` [PATCH 3/9] pagemap: use PAGE_MASK/PAGE_ALIGN() Dave Hansen
2007-08-22 23:18 ` [PATCH 4/9] pagemap: remove open-coded sizeof(unsigned long) Dave Hansen
2007-08-22 23:18 ` [PATCH 5/9] introduce TASK_SIZE_OF() for all arches Dave Hansen
2007-08-22 23:18 ` [PATCH 6/9] pagemap: give -1's a name Dave Hansen
2007-08-22 23:18 ` [PATCH 7/9] pagewalk: add handler for empty ranges Dave Hansen
2007-08-22 23:18 ` [PATCH 8/9] pagemap: use page walker pte_hole() helper Dave Hansen
2007-08-22 23:54 ` Matt Mackall
2007-08-22 23:18 ` [PATCH 9/9] pagemap: export swap ptes Dave Hansen
2007-08-24 0:29 ` Matt Mackall
2007-08-24 16:19 ` Dave Hansen
2007-08-24 16:56 ` Matt Mackall
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).