* [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing
@ 2009-07-24 3:57 Moussa A. Ba
2009-07-24 8:39 ` Amerigo Wang
0 siblings, 1 reply; 21+ messages in thread
From: Moussa A. Ba @ 2009-07-24 3:57 UTC (permalink / raw)
To: linux-kernel
Cc: akpm, adobriyan, mpm, mel, yinghan, npiggin, jaredeh, moussa.a.ba
Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com>
Signed-off-by: Moussa Ba <moussa.a.ba@gmail.com>
--- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700
+++ b/fs/proc/task_mmu.c 2009-07-21 14:32:56.000000000 -0700
@@ -461,6 +461,33 @@
cond_resched();
return 0;
}
+static void walk_vma_area(struct mm_walk *this_walk, struct
vm_area_struct *vma,
+ int type)
+{
+ switch (type) {
+ /* Clear Anon VMAs only */
+ case 1:
+ if (!is_vm_hugetlb_page(vma) && !vma->vm_file)
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ /* Clear Mapped VMAs only */
+ case 2:
+ if (!is_vm_hugetlb_page(vma) && vma->vm_file)
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ /* Clear All VMAs */
+ default:
+ if (!is_vm_hugetlb_page(vma))
+ walk_page_range(vma->vm_start,
+ vma->vm_end,
+ this_walk);
+ break;
+ }
+}
static ssize_t clear_refs_write(struct file *file, const char __user *
buf,
size_t count, loff_t * ppos)
@@ -469,13 +496,15 @@
char buffer[PROC_NUMBUF], *end;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ int type;
memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- if (!simple_strtol(buffer, &end, 0))
+ type = simple_strtol(buffer, &end, 0);
+ if (!type)
return -EINVAL;
if (*end == '\n')
end++;
@@ -491,9 +520,7 @@
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
- if (!is_vm_hugetlb_page(vma))
- walk_page_range(vma->vm_start, vma->vm_end,
- &clear_refs_walk);
+ walk_vma_area(&clear_refs_walk, vma, type);
}
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-24 3:57 [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing Moussa A. Ba @ 2009-07-24 8:39 ` Amerigo Wang 2009-07-24 18:00 ` Moussa Ba 2009-07-27 20:19 ` Moussa A. Ba 0 siblings, 2 replies; 21+ messages in thread From: Amerigo Wang @ 2009-07-24 8:39 UTC (permalink / raw) To: Moussa A. Ba Cc: linux-kernel, akpm, adobriyan, mpm, mel, yinghan, npiggin, jaredeh On Thu, Jul 23, 2009 at 08:57:22PM -0700, Moussa A. Ba wrote: > > Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com> > Signed-off-by: Moussa Ba <moussa.a.ba@gmail.com> > --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > +++ b/fs/proc/task_mmu.c 2009-07-21 14:32:56.000000000 -0700 > @@ -461,6 +461,33 @@ > cond_resched(); > return 0; > } > +static void walk_vma_area(struct mm_walk *this_walk, struct > vm_area_struct *vma, > + int type) > +{ > + switch (type) { > + /* Clear Anon VMAs only */ > + case 1: > + if (!is_vm_hugetlb_page(vma) && !vma->vm_file) > + walk_page_range(vma->vm_start, > + vma->vm_end, > + this_walk); > + break; > + /* Clear Mapped VMAs only */ > + case 2: > + if (!is_vm_hugetlb_page(vma) && vma->vm_file) > + walk_page_range(vma->vm_start, > + vma->vm_end, > + this_walk); > + break; > + /* Clear All VMAs */ > + default: > + if (!is_vm_hugetlb_page(vma)) > + walk_page_range(vma->vm_start, > + vma->vm_end, > + this_walk); > + break; > + } > +} > First, can you make these plain numbers as macros or enum types? Second, I believe the above code can be simiplified, how about below? { if (!is_vm_hugetlb_page(vma)) { if (type == 1 && vma->vm_file) return; if (type == 2 && !vma->vm_file) return; walk_page_range(vma->vm_start, vma->end, this_walk); } } > static ssize_t clear_refs_write(struct file *file, const char __user * > buf, > size_t count, loff_t * ppos) > @@ -469,13 +496,15 @@ > char buffer[PROC_NUMBUF], *end; > struct mm_struct *mm; > struct vm_area_struct *vma; > + int type; > > memset(buffer, 0, sizeof(buffer)); > if (count > sizeof(buffer) - 1) > count = sizeof(buffer) - 1; > if (copy_from_user(buffer, buf, count)) > return -EFAULT; > - if (!simple_strtol(buffer, &end, 0)) > + type = simple_strtol(buffer, &end, 0); > + if (!type) > return -EINVAL; > if (*end == '\n') > end++; > @@ -491,9 +520,7 @@ > down_read(&mm->mmap_sem); > for (vma = mm->mmap; vma; vma = vma->vm_next) { > clear_refs_walk.private = vma; > - if (!is_vm_hugetlb_page(vma)) > - walk_page_range(vma->vm_start, vma->vm_end, > - &clear_refs_walk); > + walk_vma_area(&clear_refs_walk, vma, type); > } > flush_tlb_mm(mm); > up_read(&mm->mmap_sem); > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-24 8:39 ` Amerigo Wang @ 2009-07-24 18:00 ` Moussa Ba 2009-07-27 20:19 ` Moussa A. Ba 1 sibling, 0 replies; 21+ messages in thread From: Moussa Ba @ 2009-07-24 18:00 UTC (permalink / raw) To: Amerigo Wang Cc: linux-kernel, akpm, adobriyan, mpm, mel, yinghan, npiggin, jaredeh On Fri, Jul 24, 2009 at 1:39 AM, Amerigo Wang <xiyou.wangcong@gmail.com> wrote: > > On Thu, Jul 23, 2009 at 08:57:22PM -0700, Moussa A. Ba wrote: > > > > Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com> > > Signed-off-by: Moussa Ba <moussa.a.ba@gmail.com> > > --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > > +++ b/fs/proc/task_mmu.c 2009-07-21 14:32:56.000000000 -0700 > > @@ -461,6 +461,33 @@ > > cond_resched(); > > return 0; > > } > > +static void walk_vma_area(struct mm_walk *this_walk, struct > > vm_area_struct *vma, > > + int type) > > +{ > > + switch (type) { > > + /* Clear Anon VMAs only */ > > + case 1: > > + if (!is_vm_hugetlb_page(vma) && !vma->vm_file) > > + walk_page_range(vma->vm_start, > > + vma->vm_end, > > + this_walk); > > + break; > > + /* Clear Mapped VMAs only */ > > + case 2: > > + if (!is_vm_hugetlb_page(vma) && vma->vm_file) > > + walk_page_range(vma->vm_start, > > + vma->vm_end, > > + this_walk); > > + break; > > + /* Clear All VMAs */ > > + default: > > + if (!is_vm_hugetlb_page(vma)) > > + walk_page_range(vma->vm_start, > > + vma->vm_end, > > + this_walk); > > + break; > > + } > > +} > > > > > First, can you make these plain numbers as macros or enum types? > > Second, I believe the above code can be simiplified, how about below? > > { > if (!is_vm_hugetlb_page(vma)) { > if (type == 1 && vma->vm_file) > return; > if (type == 2 && !vma->vm_file) > return; > walk_page_range(vma->vm_start, vma->end, this_walk); > } > } > It would indeed work, it can probably be written as: { if (is_vm_hugetlb_page(vma)) return; if (type == 2 && vma->vm_file) return; if (type == 3 && !vma->vm_file) return; walk_page_range(vma->vm_start, vma->vm_end, this_walk); } The change in numbers also addresses Matt's concern that writing a 1 is the default behavior where all vmas are cleared, hence it is better to not break that and make the selective clearing options 2 and 3. There is no documentation for clear_refs beyond a 1 liner that states "Clears page referenced bits shown in smaps output". I will also update the Documentation and add it to the patch. > > > > static ssize_t clear_refs_write(struct file *file, const char __user * > > buf, > > size_t count, loff_t * ppos) > > @@ -469,13 +496,15 @@ > > char buffer[PROC_NUMBUF], *end; > > struct mm_struct *mm; > > struct vm_area_struct *vma; > > + int type; > > > > memset(buffer, 0, sizeof(buffer)); > > if (count > sizeof(buffer) - 1) > > count = sizeof(buffer) - 1; > > if (copy_from_user(buffer, buf, count)) > > return -EFAULT; > > - if (!simple_strtol(buffer, &end, 0)) > > + type = simple_strtol(buffer, &end, 0); > > + if (!type) > > return -EINVAL; > > if (*end == '\n') > > end++; > > @@ -491,9 +520,7 @@ > > down_read(&mm->mmap_sem); > > for (vma = mm->mmap; vma; vma = vma->vm_next) { > > clear_refs_walk.private = vma; > > - if (!is_vm_hugetlb_page(vma)) > > - walk_page_range(vma->vm_start, vma->vm_end, > > - &clear_refs_walk); > > + walk_vma_area(&clear_refs_walk, vma, type); > > } > > flush_tlb_mm(mm); > > up_read(&mm->mmap_sem); > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-24 8:39 ` Amerigo Wang 2009-07-24 18:00 ` Moussa Ba @ 2009-07-27 20:19 ` Moussa A. Ba 2009-07-27 20:30 ` Matt Mackall 2009-07-27 20:57 ` David Rientjes 1 sibling, 2 replies; 21+ messages in thread From: Moussa A. Ba @ 2009-07-27 20:19 UTC (permalink / raw) To: linux-kernel Cc: Amerigo Wang, akpm, adobriyan, mpm, mel, yinghan, npiggin, jaredeh This patch makes the clear_refs proc interface a bit more versatile. It adds support for clearing anonymous pages, file mapped pages or both. The clear_refs entry is used to reset the Referenced bits on virtual and physical pages associated with a process. echo 1 > /proc/PID/clear_refs clears all pages associated with the process echo 2 > /proc/PID/clear_refs clears anonymous pages only echo 3 > /proc/PID/clear_refs clears file mapped pages only Any other value written to the proc entry will clear all pages. Selective clearing the pages has a measurable impact on performance as it limits the number of page walks. We have been using this interface and this adds flexibility to the user user space application implementing the reference clearing. Signed-off-by: Jared Hulbert (jaredeh@gmail.com) Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) ------- Documentation/filesystems/proc.txt | 7 +++++++ fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 @@ -462,6 +462,27 @@ return 0; } +static void walk_vma_area(struct mm_walk *this_walk, + struct vm_area_struct *vma, int type) +{ + + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous + * pages. + * + * Writing 3 to /proc/pid/clear_refs will clear all file mapped + * pages. + * + * Writing any other value including 1 will clear all pages + */ + if (is_vm_hugetlb_page(vma)) + return; + if (type == 2 && vma->vm_file) + return; + if (type == 3 && !vma->vm_file) + return; + walk_page_range(vma->vm_start, vma->vm_end, this_walk); +} + static ssize_t clear_refs_write(struct file *file, const char __user * buf, size_t count, loff_t * ppos) { @@ -469,13 +490,15 @@ char buffer[PROC_NUMBUF], *end; struct mm_struct *mm; struct vm_area_struct *vma; + int type; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) + type = strict_strtol(buffer, &end, 0); + if (!type) return -EINVAL; if (*end == '\n') end++; @@ -491,9 +514,7 @@ down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; - if (!is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); + walk_vma_area(&clear_refs_walk, vma, type); } flush_tlb_mm(mm); up_read(&mm->mmap_sem); --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700 +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 -0700 @@ -375,6 +375,13 @@ This file is only present if the CONFIG_MMU kernel configuration option is enabled. +The clear_refs entry is used to reset the Referenced bits on virtual and physical +pages associated with a process. +echo 1 > /proc/PID/clear_refs clears all pages associated with the process +echo 2 > /proc/PID/clear_refs clears anonymous pages only +echo 3 > /proc/PID/clear_refs clears file mapped pages only +Any other value written to the proc entry will clear all pages. + 1.2 Kernel data --------------- ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 20:19 ` Moussa A. Ba @ 2009-07-27 20:30 ` Matt Mackall 2009-07-27 20:57 ` David Rientjes 1 sibling, 0 replies; 21+ messages in thread From: Matt Mackall @ 2009-07-27 20:30 UTC (permalink / raw) To: Moussa A. Ba Cc: linux-kernel, Amerigo Wang, akpm, adobriyan, mel, yinghan, npiggin, jaredeh On Mon, 2009-07-27 at 13:19 -0700, Moussa A. Ba wrote: > This patch makes the clear_refs proc interface a bit more versatile. > It adds support for clearing anonymous pages, file mapped pages or both. > > The clear_refs entry is used to reset the Referenced bits on virtual and > physical pages associated with a process. > echo 1 > /proc/PID/clear_refs clears all pages associated with the process > echo 2 > /proc/PID/clear_refs clears anonymous pages only > echo 3 > /proc/PID/clear_refs clears file mapped pages only > Any other value written to the proc entry will clear all pages. > > Selective clearing the pages has a measurable impact on performance as it > limits the number of page walks. We have been using this interface and this > adds flexibility to the user user space application implementing the reference > clearing. Looks ok to me. Acked-by: Matt Mackall <mpm@selenic.com> -- http://selenic.com : development and support for Mercurial and Linux ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 20:19 ` Moussa A. Ba 2009-07-27 20:30 ` Matt Mackall @ 2009-07-27 20:57 ` David Rientjes 2009-07-27 22:14 ` Moussa Ba 1 sibling, 1 reply; 21+ messages in thread From: David Rientjes @ 2009-07-27 20:57 UTC (permalink / raw) To: Moussa A. Ba Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, 27 Jul 2009, Moussa A. Ba wrote: > This patch makes the clear_refs proc interface a bit more versatile. > It adds support for clearing anonymous pages, file mapped pages or both. > It already has support for clearing both, so you're only adding anonymous and file-backed filters. > The clear_refs entry is used to reset the Referenced bits on virtual and > physical pages associated with a process. > echo 1 > /proc/PID/clear_refs clears all pages associated with the process > echo 2 > /proc/PID/clear_refs clears anonymous pages only > echo 3 > /proc/PID/clear_refs clears file mapped pages only > Any other value written to the proc entry will clear all pages. > clear_refs currently accepts any non-zero value, so it's possible that this will break user scripts that, for whatever reason, write '2' or '3'. I think that's acceptable, but it would be helpful to make all other values a no-op similar to drop_caches at this point to avoid the potential for breakage if this is ever extended any further. > Selective clearing the pages has a measurable impact on performance as it > limits the number of page walks. We have been using this interface and this > adds flexibility to the user user space application implementing the reference > clearing. > > Signed-off-by: Jared Hulbert (jaredeh@gmail.com) > Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) Email addresses in < > braces, please. The first sign-off line normally indicates who wrote the patch, but your submission lacks a From: line, so git would indicate you wrote it. If that's incorrect, please add a From: line as described in Documentation/SubmittingPatches. If it's correct, please reorder your sign-off lines. > ------- > Documentation/filesystems/proc.txt | 7 +++++++ > fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- > 2 files changed, 32 insertions(+), 4 deletions(-) > > --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 > @@ -462,6 +462,27 @@ > return 0; > } > > +static void walk_vma_area(struct mm_walk *this_walk, > + struct vm_area_struct *vma, int type) > +{ This is a very generic name for something that is only applicable to clear_refs, so please name it accordingly. This will also avoid having to pass the struct mm_walk * in since its only user is clear_refs_walk. > + > + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous > + * pages. > + * > + * Writing 3 to /proc/pid/clear_refs will clear all file mapped > + * pages. > + * > + * Writing any other value including 1 will clear all pages > + */ Documentation/CodingStyle would suggest this format: /* * Multi-line kernel comments always start .. * with an empty first line. */ > + if (is_vm_hugetlb_page(vma)) > + return; > + if (type == 2 && vma->vm_file) > + return; > + if (type == 3 && !vma->vm_file) > + return; > + walk_page_range(vma->vm_start, vma->vm_end, this_walk); > +} K&R would suggest #define's (or enums) for those hard-coded values. I think that's already been suggested for this patch, actually. > + > static ssize_t clear_refs_write(struct file *file, const char __user * buf, > size_t count, loff_t * ppos) > { > @@ -469,13 +490,15 @@ > char buffer[PROC_NUMBUF], *end; > struct mm_struct *mm; > struct vm_area_struct *vma; > + int type; > > memset(buffer, 0, sizeof(buffer)); > if (count > sizeof(buffer) - 1) > count = sizeof(buffer) - 1; > if (copy_from_user(buffer, buf, count)) > return -EFAULT; > - if (!simple_strtol(buffer, &end, 0)) > + type = strict_strtol(buffer, &end, 0); > + if (!type) > return -EINVAL; > if (*end == '\n') > end++; > @@ -491,9 +514,7 @@ > down_read(&mm->mmap_sem); > for (vma = mm->mmap; vma; vma = vma->vm_next) { > clear_refs_walk.private = vma; > - if (!is_vm_hugetlb_page(vma)) > - walk_page_range(vma->vm_start, vma->vm_end, > - &clear_refs_walk); > + walk_vma_area(&clear_refs_walk, vma, type); > } > flush_tlb_mm(mm); > up_read(&mm->mmap_sem); > --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 > -0700 > +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 > -0700 > @@ -375,6 +375,13 @@ > This file is only present if the CONFIG_MMU kernel configuration option is > enabled. > > +The clear_refs entry is used to reset the Referenced bits on virtual and physical > +pages associated with a process. > +echo 1 > /proc/PID/clear_refs clears all pages associated with the process > +echo 2 > /proc/PID/clear_refs clears anonymous pages only > +echo 3 > /proc/PID/clear_refs clears file mapped pages only > +Any other value written to the proc entry will clear all pages. > + Please follow the format in this document for how other /proc/PID/* entries are described. That format could really be improved here, perhaps you could clean proc.txt up a little bit while you're here? Also, as the author of clear_refs, please cc me on future revisions of this patch. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 20:57 ` David Rientjes @ 2009-07-27 22:14 ` Moussa Ba 2009-07-27 22:20 ` Matt Mackall 2009-07-27 22:49 ` David Rientjes 0 siblings, 2 replies; 21+ messages in thread From: Moussa Ba @ 2009-07-27 22:14 UTC (permalink / raw) To: David Rientjes Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, Jul 27, 2009 at 1:57 PM, David Rientjes<rientjes@google.com> wrote: > On Mon, 27 Jul 2009, Moussa A. Ba wrote: > >> This patch makes the clear_refs proc interface a bit more versatile. >> It adds support for clearing anonymous pages, file mapped pages or both. >> > > It already has support for clearing both, so you're only adding anonymous > and file-backed filters. > >> The clear_refs entry is used to reset the Referenced bits on virtual and >> physical pages associated with a process. >> echo 1 > /proc/PID/clear_refs clears all pages associated with the process >> echo 2 > /proc/PID/clear_refs clears anonymous pages only >> echo 3 > /proc/PID/clear_refs clears file mapped pages only >> Any other value written to the proc entry will clear all pages. >> > > clear_refs currently accepts any non-zero value, so it's possible that > this will break user scripts that, for whatever reason, write '2' or '3'. > I think that's acceptable, but it would be helpful to make all other > values a no-op similar to drop_caches at this point to avoid the potential > for breakage if this is ever extended any further. > >> Selective clearing the pages has a measurable impact on performance as it >> limits the number of page walks. We have been using this interface and this >> adds flexibility to the user user space application implementing the reference >> clearing. >> >> Signed-off-by: Jared Hulbert (jaredeh@gmail.com) >> Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) > > Email addresses in < > braces, please. > > The first sign-off line normally indicates who wrote the patch, but your > submission lacks a From: line, so git would indicate you wrote it. If > that's incorrect, please add a From: line as described in > Documentation/SubmittingPatches. If it's correct, please reorder your > sign-off lines. > I will reorder the sign-off lines >> ------- >> Documentation/filesystems/proc.txt | 7 +++++++ >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- >> 2 files changed, 32 insertions(+), 4 deletions(-) >> >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 >> @@ -462,6 +462,27 @@ >> return 0; >> } >> >> +static void walk_vma_area(struct mm_walk *this_walk, >> + struct vm_area_struct *vma, int type) >> +{ > > This is a very generic name for something that is only applicable to > clear_refs, so please name it accordingly. This will also avoid having to > pass the struct mm_walk * in since its only user is clear_refs_walk. > Done. >> + >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous >> + * pages. >> + * >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped >> + * pages. >> + * >> + * Writing any other value including 1 will clear all pages >> + */ > > Documentation/CodingStyle would suggest this format: > > /* > * Multi-line kernel comments always start .. > * with an empty first line. > */ > Done. >> + if (is_vm_hugetlb_page(vma)) >> + return; >> + if (type == 2 && vma->vm_file) >> + return; >> + if (type == 3 && !vma->vm_file) >> + return; >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk); >> +} > > K&R would suggest #define's (or enums) for those hard-coded values. I > think that's already been suggested for this patch, actually. > Would this be acceptable? enum clear_refs_walk_type { CLEAR_REFS_ALL = 1, CLEAR_REFS_ANON = 2, CLEAR_REFS_MAPPED = 3 }; static void clear_refs_walk_vma_area(struct mm_walk *this_walk, struct vm_area_struct *vma, enum clear_refs_walk_type type) { /* * Writing 1 to /proc/pid/clear_refs clears all pages. * Writing 2 to /proc/pid/clear_refs clears Anonymous pages. * Writing 3 to /proc/pid/clear_refs clears file mapped pages. */ if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return; if (is_vm_hugetlb_page(vma)) return; if (type == CLEAR_REFS_ANON && vma->vm_file) return; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) return; walk_page_range(vma->vm_start, vma->vm_end, this_walk); } >> + >> static ssize_t clear_refs_write(struct file *file, const char __user * buf, >> size_t count, loff_t * ppos) >> { >> @@ -469,13 +490,15 @@ >> char buffer[PROC_NUMBUF], *end; >> struct mm_struct *mm; >> struct vm_area_struct *vma; >> + int type; >> >> memset(buffer, 0, sizeof(buffer)); >> if (count > sizeof(buffer) - 1) >> count = sizeof(buffer) - 1; >> if (copy_from_user(buffer, buf, count)) >> return -EFAULT; >> - if (!simple_strtol(buffer, &end, 0)) >> + type = strict_strtol(buffer, &end, 0); >> + if (!type) >> return -EINVAL; >> if (*end == '\n') >> end++; >> @@ -491,9 +514,7 @@ >> down_read(&mm->mmap_sem); >> for (vma = mm->mmap; vma; vma = vma->vm_next) { >> clear_refs_walk.private = vma; >> - if (!is_vm_hugetlb_page(vma)) >> - walk_page_range(vma->vm_start, vma->vm_end, >> - &clear_refs_walk); >> + walk_vma_area(&clear_refs_walk, vma, type); >> } >> flush_tlb_mm(mm); >> up_read(&mm->mmap_sem); >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 >> -0700 >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 >> -0700 >> @@ -375,6 +375,13 @@ >> This file is only present if the CONFIG_MMU kernel configuration option is >> enabled. >> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical >> +pages associated with a process. >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only >> +Any other value written to the proc entry will clear all pages. >> + > > Please follow the format in this document for how other /proc/PID/* > entries are described. > > That format could really be improved here, perhaps you could clean > proc.txt up a little bit while you're here? > > I am not sure what you mean by "clean" proc.txt, I did not detect much formatting in the PID proc enries description, beyond what I rewrote below: The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and physical pages associated with a process. To clear all pages associated with the process > echo 1 > /proc/PID/clear_refs To clear all anonymous pages associated with the process > echo 2 > /proc/PID/clear_refs To clear all file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs Any other value written to /proc/PID/clear_refs will have no effect. > Also, as the author of clear_refs, please cc me on future revisions of > this patch. > I shall. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 22:14 ` Moussa Ba @ 2009-07-27 22:20 ` Matt Mackall 2009-07-27 22:49 ` David Rientjes 1 sibling, 0 replies; 21+ messages in thread From: Matt Mackall @ 2009-07-27 22:20 UTC (permalink / raw) To: Moussa Ba Cc: David Rientjes, linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, 2009-07-27 at 15:14 -0700, Moussa Ba wrote: > On Mon, Jul 27, 2009 at 1:57 PM, David Rientjes<rientjes@google.com> wrote: > > On Mon, 27 Jul 2009, Moussa A. Ba wrote: > > > >> This patch makes the clear_refs proc interface a bit more versatile. > >> It adds support for clearing anonymous pages, file mapped pages or both. > >> > > > > It already has support for clearing both, so you're only adding anonymous > > and file-backed filters. > > > >> The clear_refs entry is used to reset the Referenced bits on virtual and > >> physical pages associated with a process. > >> echo 1 > /proc/PID/clear_refs clears all pages associated with the process > >> echo 2 > /proc/PID/clear_refs clears anonymous pages only > >> echo 3 > /proc/PID/clear_refs clears file mapped pages only > >> Any other value written to the proc entry will clear all pages. > >> > > > > clear_refs currently accepts any non-zero value, so it's possible that > > this will break user scripts that, for whatever reason, write '2' or '3'. > > I think that's acceptable, but it would be helpful to make all other > > values a no-op similar to drop_caches at this point to avoid the potential > > for breakage if this is ever extended any further. > > > >> Selective clearing the pages has a measurable impact on performance as it > >> limits the number of page walks. We have been using this interface and this > >> adds flexibility to the user user space application implementing the reference > >> clearing. > >> > >> Signed-off-by: Jared Hulbert (jaredeh@gmail.com) > >> Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) > > > > Email addresses in < > braces, please. > > > > The first sign-off line normally indicates who wrote the patch, but your > > submission lacks a From: line, so git would indicate you wrote it. If > > that's incorrect, please add a From: line as described in > > Documentation/SubmittingPatches. If it's correct, please reorder your > > sign-off lines. > > > I will reorder the sign-off lines > >> ------- > >> Documentation/filesystems/proc.txt | 7 +++++++ > >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- > >> 2 files changed, 32 insertions(+), 4 deletions(-) > >> > >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 > >> @@ -462,6 +462,27 @@ > >> return 0; > >> } > >> > >> +static void walk_vma_area(struct mm_walk *this_walk, > >> + struct vm_area_struct *vma, int type) > >> +{ > > > > This is a very generic name for something that is only applicable to > > clear_refs, so please name it accordingly. This will also avoid having to > > pass the struct mm_walk * in since its only user is clear_refs_walk. > > > Done. > >> + > >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous > >> + * pages. > >> + * > >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped > >> + * pages. > >> + * > >> + * Writing any other value including 1 will clear all pages > >> + */ > > > > Documentation/CodingStyle would suggest this format: > > > > /* > > * Multi-line kernel comments always start .. > > * with an empty first line. > > */ > > > Done. > >> + if (is_vm_hugetlb_page(vma)) > >> + return; > >> + if (type == 2 && vma->vm_file) > >> + return; > >> + if (type == 3 && !vma->vm_file) > >> + return; > >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk); > >> +} > > > > K&R would suggest #define's (or enums) for those hard-coded values. I > > think that's already been suggested for this patch, actually. > > > > Would this be acceptable? > > enum clear_refs_walk_type { > CLEAR_REFS_ALL = 1, > CLEAR_REFS_ANON = 2, > CLEAR_REFS_MAPPED = 3 > }; I don't see a scenario where we use these values in more than one place, so I don't really see this as much of an improvement given that the magic numbers are already documented clearly in situ. But I think we tend to lean towards #defines rather than enums in any case. > static void clear_refs_walk_vma_area(struct mm_walk *this_walk, > struct vm_area_struct *vma, enum clear_refs_walk_type type) > { > > /* > * Writing 1 to /proc/pid/clear_refs clears all pages. > * Writing 2 to /proc/pid/clear_refs clears Anonymous pages. > * Writing 3 to /proc/pid/clear_refs clears file mapped pages. > */ > if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) > return; > if (is_vm_hugetlb_page(vma)) > return; > if (type == CLEAR_REFS_ANON && vma->vm_file) > return; > if (type == CLEAR_REFS_MAPPED && !vma->vm_file) > return; > walk_page_range(vma->vm_start, vma->vm_end, this_walk); > } > > > >> + > >> static ssize_t clear_refs_write(struct file *file, const char __user * buf, > >> size_t count, loff_t * ppos) > >> { > >> @@ -469,13 +490,15 @@ > >> char buffer[PROC_NUMBUF], *end; > >> struct mm_struct *mm; > >> struct vm_area_struct *vma; > >> + int type; > >> > >> memset(buffer, 0, sizeof(buffer)); > >> if (count > sizeof(buffer) - 1) > >> count = sizeof(buffer) - 1; > >> if (copy_from_user(buffer, buf, count)) > >> return -EFAULT; > >> - if (!simple_strtol(buffer, &end, 0)) > >> + type = strict_strtol(buffer, &end, 0); > >> + if (!type) > >> return -EINVAL; > >> if (*end == '\n') > >> end++; > >> @@ -491,9 +514,7 @@ > >> down_read(&mm->mmap_sem); > >> for (vma = mm->mmap; vma; vma = vma->vm_next) { > >> clear_refs_walk.private = vma; > >> - if (!is_vm_hugetlb_page(vma)) > >> - walk_page_range(vma->vm_start, vma->vm_end, > >> - &clear_refs_walk); > >> + walk_vma_area(&clear_refs_walk, vma, type); > >> } > >> flush_tlb_mm(mm); > >> up_read(&mm->mmap_sem); > >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 > >> -0700 > >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 > >> -0700 > >> @@ -375,6 +375,13 @@ > >> This file is only present if the CONFIG_MMU kernel configuration option is > >> enabled. > >> > >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical > >> +pages associated with a process. > >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process > >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only > >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only > >> +Any other value written to the proc entry will clear all pages. > >> + > > > > Please follow the format in this document for how other /proc/PID/* > > entries are described. > > > > That format could really be improved here, perhaps you could clean > > proc.txt up a little bit while you're here? > > > > > > I am not sure what you mean by "clean" proc.txt, I did not detect much > formatting in the PID proc enries description, beyond what I rewrote > below: > > > The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and > physical pages associated with a process. > To clear all pages associated with the process > > echo 1 > /proc/PID/clear_refs > > To clear all anonymous pages associated with the process > > echo 2 > /proc/PID/clear_refs > > To clear all file mapped pages associated with the process > > echo 3 > /proc/PID/clear_refs > Any other value written to /proc/PID/clear_refs will have no effect. > > > > Also, as the author of clear_refs, please cc me on future revisions of > > this patch. > > > > I shall. -- http://selenic.com : development and support for Mercurial and Linux ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 22:14 ` Moussa Ba 2009-07-27 22:20 ` Matt Mackall @ 2009-07-27 22:49 ` David Rientjes 2009-07-27 23:38 ` Moussa Ba 1 sibling, 1 reply; 21+ messages in thread From: David Rientjes @ 2009-07-27 22:49 UTC (permalink / raw) To: Moussa Ba Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh [-- Attachment #1: Type: TEXT/PLAIN, Size: 7611 bytes --] On Mon, 27 Jul 2009, Moussa Ba wrote: > > clear_refs currently accepts any non-zero value, so it's possible that > > this will break user scripts that, for whatever reason, write '2' or '3'. > > I think that's acceptable, but it would be helpful to make all other > > values a no-op similar to drop_caches at this point to avoid the potential > > for breakage if this is ever extended any further. > > In your latest post, I see you implemented this in clear_refs_walk_vma_area() by checking for CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. Thanks! Don't forget to update Documentation/filesystems/proc.txt to specify that. > >> Selective clearing the pages has a measurable impact on performance as it > >> limits the number of page walks. We have been using this interface and this > >> adds flexibility to the user user space application implementing the reference > >> clearing. > >> > >> Signed-off-by: Jared Hulbert (jaredeh@gmail.com) > >> Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) > > > > Email addresses in < > braces, please. > > > > The first sign-off line normally indicates who wrote the patch, but your > > submission lacks a From: line, so git would indicate you wrote it. If > > that's incorrect, please add a From: line as described in > > Documentation/SubmittingPatches. If it's correct, please reorder your > > sign-off lines. > > > I will reorder the sign-off lines Ok, that removes the ambiguity concerning authorship, thanks. > >> ------- > >> Documentation/filesystems/proc.txt | 7 +++++++ > >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- > >> 2 files changed, 32 insertions(+), 4 deletions(-) > >> > >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 > >> @@ -462,6 +462,27 @@ > >> return 0; > >> } > >> > >> +static void walk_vma_area(struct mm_walk *this_walk, > >> + struct vm_area_struct *vma, int type) > >> +{ > > > > This is a very generic name for something that is only applicable to > > clear_refs, so please name it accordingly. This will also avoid having to > > pass the struct mm_walk * in since its only user is clear_refs_walk. > > > Done. > >> + > >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous > >> + * pages. > >> + * > >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped > >> + * pages. > >> + * > >> + * Writing any other value including 1 will clear all pages > >> + */ > > > > Documentation/CodingStyle would suggest this format: > > > > /* > > * Multi-line kernel comments always start .. > > * with an empty first line. > > */ > > > Done. > >> + if (is_vm_hugetlb_page(vma)) > >> + return; > >> + if (type == 2 && vma->vm_file) > >> + return; > >> + if (type == 3 && !vma->vm_file) > >> + return; > >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk); > >> +} > > > > K&R would suggest #define's (or enums) for those hard-coded values. I > > think that's already been suggested for this patch, actually. > > > > Would this be acceptable? > > enum clear_refs_walk_type { > CLEAR_REFS_ALL = 1, > CLEAR_REFS_ANON = 2, > CLEAR_REFS_MAPPED = 3 > }; > #define seems more appropriate for this particular use case. We simply try to avoid hard-coded integers in source code (rationale: K&R). > static void clear_refs_walk_vma_area(struct mm_walk *this_walk, > struct vm_area_struct *vma, enum clear_refs_walk_type type) > { Ddi you consider my suggestion of dropping the struct mm_walk * formal since this is now a clear_refs-specific function? walk_page_range() will always take clear_refs_walk, there's no need to pass it in. > > /* > * Writing 1 to /proc/pid/clear_refs clears all pages. > * Writing 2 to /proc/pid/clear_refs clears Anonymous pages. > * Writing 3 to /proc/pid/clear_refs clears file mapped pages. > */ > if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) > return; > if (is_vm_hugetlb_page(vma)) > return; > if (type == CLEAR_REFS_ANON && vma->vm_file) > return; > if (type == CLEAR_REFS_MAPPED && !vma->vm_file) > return; > walk_page_range(vma->vm_start, vma->vm_end, this_walk); > } > > > >> + > >> static ssize_t clear_refs_write(struct file *file, const char __user * buf, > >> size_t count, loff_t * ppos) > >> { > >> @@ -469,13 +490,15 @@ > >> char buffer[PROC_NUMBUF], *end; > >> struct mm_struct *mm; > >> struct vm_area_struct *vma; > >> + int type; > >> > >> memset(buffer, 0, sizeof(buffer)); > >> if (count > sizeof(buffer) - 1) > >> count = sizeof(buffer) - 1; > >> if (copy_from_user(buffer, buf, count)) > >> return -EFAULT; > >> - if (!simple_strtol(buffer, &end, 0)) > >> + type = strict_strtol(buffer, &end, 0); > >> + if (!type) > >> return -EINVAL; > >> if (*end == '\n') > >> end++; > >> @@ -491,9 +514,7 @@ > >> down_read(&mm->mmap_sem); > >> for (vma = mm->mmap; vma; vma = vma->vm_next) { > >> clear_refs_walk.private = vma; > >> - if (!is_vm_hugetlb_page(vma)) > >> - walk_page_range(vma->vm_start, vma->vm_end, > >> - &clear_refs_walk); > >> + walk_vma_area(&clear_refs_walk, vma, type); > >> } > >> flush_tlb_mm(mm); > >> up_read(&mm->mmap_sem); > >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 > >> -0700 > >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 > >> -0700 > >> @@ -375,6 +375,13 @@ > >> This file is only present if the CONFIG_MMU kernel configuration option is > >> enabled. > >> > >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical > >> +pages associated with a process. > >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process > >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only > >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only > >> +Any other value written to the proc entry will clear all pages. > >> + > > > > Please follow the format in this document for how other /proc/PID/* > > entries are described. > > > > That format could really be improved here, perhaps you could clean > > proc.txt up a little bit while you're here? > > > > > > I am not sure what you mean by "clean" proc.txt, I did not detect much > formatting in the PID proc enries description, beyond what I rewrote > below: > Right, the file is pretty sloppy, so I was wondering if you wanted to take the time to clean it up a little so there's a more consistent style. > > The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and Probably better to say PG_referenced instead of Referenced. > physical pages associated with a process. > To clear all pages associated with the process > > echo 1 > /proc/PID/clear_refs > > To clear all anonymous pages associated with the process > > echo 2 > /proc/PID/clear_refs > > To clear all file mapped pages associated with the process > > echo 3 > /proc/PID/clear_refs > Any other value written to /proc/PID/clear_refs will have no effect. > You should probably say these all clear the bit instead of saying they "clear pages," which doesn't make a lot of sense. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 22:49 ` David Rientjes @ 2009-07-27 23:38 ` Moussa Ba 2009-07-27 23:42 ` Moussa Ba 2009-07-27 23:58 ` David Rientjes 0 siblings, 2 replies; 21+ messages in thread From: Moussa Ba @ 2009-07-27 23:38 UTC (permalink / raw) To: David Rientjes Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, Jul 27, 2009 at 3:49 PM, David Rientjes<rientjes@google.com> wrote: > On Mon, 27 Jul 2009, Moussa Ba wrote: > >> > clear_refs currently accepts any non-zero value, so it's possible that >> > this will break user scripts that, for whatever reason, write '2' or '3'. >> > I think that's acceptable, but it would be helpful to make all other >> > values a no-op similar to drop_caches at this point to avoid the potential >> > for breakage if this is ever extended any further. >> > > > In your latest post, I see you implemented this in > clear_refs_walk_vma_area() by checking for > CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. Thanks! Don't forget to > update Documentation/filesystems/proc.txt to specify that. > >> >> Selective clearing the pages has a measurable impact on performance as it >> >> limits the number of page walks. We have been using this interface and this >> >> adds flexibility to the user user space application implementing the reference >> >> clearing. >> >> >> >> Signed-off-by: Jared Hulbert (jaredeh@gmail.com) >> >> Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) >> > >> > Email addresses in < > braces, please. >> > >> > The first sign-off line normally indicates who wrote the patch, but your >> > submission lacks a From: line, so git would indicate you wrote it. If >> > that's incorrect, please add a From: line as described in >> > Documentation/SubmittingPatches. If it's correct, please reorder your >> > sign-off lines. >> > >> I will reorder the sign-off lines > > Ok, that removes the ambiguity concerning authorship, thanks. > >> >> ------- >> >> Documentation/filesystems/proc.txt | 7 +++++++ >> >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- >> >> 2 files changed, 32 insertions(+), 4 deletions(-) >> >> >> >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 >> >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 >> >> @@ -462,6 +462,27 @@ >> >> return 0; >> >> } >> >> >> >> +static void walk_vma_area(struct mm_walk *this_walk, >> >> + struct vm_area_struct *vma, int type) >> >> +{ >> > >> > This is a very generic name for something that is only applicable to >> > clear_refs, so please name it accordingly. This will also avoid having to >> > pass the struct mm_walk * in since its only user is clear_refs_walk. >> > >> Done. >> >> + >> >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous >> >> + * pages. >> >> + * >> >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped >> >> + * pages. >> >> + * >> >> + * Writing any other value including 1 will clear all pages >> >> + */ >> > >> > Documentation/CodingStyle would suggest this format: >> > >> > /* >> > * Multi-line kernel comments always start .. >> > * with an empty first line. >> > */ >> > >> Done. >> >> + if (is_vm_hugetlb_page(vma)) >> >> + return; >> >> + if (type == 2 && vma->vm_file) >> >> + return; >> >> + if (type == 3 && !vma->vm_file) >> >> + return; >> >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk); >> >> +} >> > >> > K&R would suggest #define's (or enums) for those hard-coded values. I >> > think that's already been suggested for this patch, actually. >> > >> >> Would this be acceptable? >> >> enum clear_refs_walk_type { >> CLEAR_REFS_ALL = 1, >> CLEAR_REFS_ANON = 2, >> CLEAR_REFS_MAPPED = 3 >> }; >> > > #define seems more appropriate for this particular use case. We simply > try to avoid hard-coded integers in source code (rationale: K&R). > >> static void clear_refs_walk_vma_area(struct mm_walk *this_walk, >> struct vm_area_struct *vma, enum clear_refs_walk_type type) >> { > > Ddi you consider my suggestion of dropping the struct mm_walk * formal > since this is now a clear_refs-specific function? walk_page_range() will > always take clear_refs_walk, there's no need to pass it in. > Well, I did but I would have to either pass clear_refs_walk or mm and build the mm_walk entry inside clear_refs_walk_vma. Simply passing the clear_refs_walk structure seemed simpler and more logical than having to rebuild it inside the function every time it is called. The other alternative would be to just forgo the additional function clear_refs_walk_vma and rewrite the for loop as: for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) continue; if (is_vm_hugetlb_page(vma)) continue; if (type == CLEAR_REFS_ANON && vma->vm_file) continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; walk_page_range(vma->vm_start, vma->vm_end, this_walk); } >> >> /* >> * Writing 1 to /proc/pid/clear_refs clears all pages. >> * Writing 2 to /proc/pid/clear_refs clears Anonymous pages. >> * Writing 3 to /proc/pid/clear_refs clears file mapped pages. >> */ >> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) >> return; >> if (is_vm_hugetlb_page(vma)) >> return; >> if (type == CLEAR_REFS_ANON && vma->vm_file) >> return; >> if (type == CLEAR_REFS_MAPPED && !vma->vm_file) >> return; >> walk_page_range(vma->vm_start, vma->vm_end, this_walk); >> } >> >> >> >> + >> >> static ssize_t clear_refs_write(struct file *file, const char __user * buf, >> >> size_t count, loff_t * ppos) >> >> { >> >> @@ -469,13 +490,15 @@ >> >> char buffer[PROC_NUMBUF], *end; >> >> struct mm_struct *mm; >> >> struct vm_area_struct *vma; >> >> + int type; >> >> >> >> memset(buffer, 0, sizeof(buffer)); >> >> if (count > sizeof(buffer) - 1) >> >> count = sizeof(buffer) - 1; >> >> if (copy_from_user(buffer, buf, count)) >> >> return -EFAULT; >> >> - if (!simple_strtol(buffer, &end, 0)) >> >> + type = strict_strtol(buffer, &end, 0); >> >> + if (!type) >> >> return -EINVAL; >> >> if (*end == '\n') >> >> end++; >> >> @@ -491,9 +514,7 @@ >> >> down_read(&mm->mmap_sem); >> >> for (vma = mm->mmap; vma; vma = vma->vm_next) { >> >> clear_refs_walk.private = vma; >> >> - if (!is_vm_hugetlb_page(vma)) >> >> - walk_page_range(vma->vm_start, vma->vm_end, >> >> - &clear_refs_walk); >> >> + walk_vma_area(&clear_refs_walk, vma, type); >> >> } >> >> flush_tlb_mm(mm); >> >> up_read(&mm->mmap_sem); >> >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 >> >> -0700 >> >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 >> >> -0700 >> >> @@ -375,6 +375,13 @@ >> >> This file is only present if the CONFIG_MMU kernel configuration option is >> >> enabled. >> >> >> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical >> >> +pages associated with a process. >> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process >> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only >> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only >> >> +Any other value written to the proc entry will clear all pages. >> >> + >> > >> > Please follow the format in this document for how other /proc/PID/* >> > entries are described. >> > >> > That format could really be improved here, perhaps you could clean >> > proc.txt up a little bit while you're here? >> > >> > >> >> I am not sure what you mean by "clean" proc.txt, I did not detect much >> formatting in the PID proc enries description, beyond what I rewrote >> below: >> > > Right, the file is pretty sloppy, so I was wondering if you wanted to take > the time to clean it up a little so there's a more consistent style. > >> >> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and > > Probably better to say PG_referenced instead of Referenced. Okay, though it would have to also state that the pte bits are cleared as well. > >> physical pages associated with a process. >> To clear all pages associated with the process >> > echo 1 > /proc/PID/clear_refs >> >> To clear all anonymous pages associated with the process >> > echo 2 > /proc/PID/clear_refs >> >> To clear all file mapped pages associated with the process >> > echo 3 > /proc/PID/clear_refs >> Any other value written to /proc/PID/clear_refs will have no effect. >> > > You should probably say these all clear the bit instead of saying they > "clear pages," which doesn't make a lot of sense. You are correct. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 23:38 ` Moussa Ba @ 2009-07-27 23:42 ` Moussa Ba 2009-07-27 23:58 ` David Rientjes 1 sibling, 0 replies; 21+ messages in thread From: Moussa Ba @ 2009-07-27 23:42 UTC (permalink / raw) To: David Rientjes Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, Jul 27, 2009 at 4:38 PM, Moussa Ba<moussa.a.ba@gmail.com> wrote: > On Mon, Jul 27, 2009 at 3:49 PM, David Rientjes<rientjes@google.com> wrote: >> On Mon, 27 Jul 2009, Moussa Ba wrote: >> >>> > clear_refs currently accepts any non-zero value, so it's possible that >>> > this will break user scripts that, for whatever reason, write '2' or '3'. >>> > I think that's acceptable, but it would be helpful to make all other >>> > values a no-op similar to drop_caches at this point to avoid the potential >>> > for breakage if this is ever extended any further. >>> > >> >> In your latest post, I see you implemented this in >> clear_refs_walk_vma_area() by checking for >> CLEAR_REFS_ALL > type > CLEAR_REFS_MAPPED. Thanks! Don't forget to >> update Documentation/filesystems/proc.txt to specify that. >> >>> >> Selective clearing the pages has a measurable impact on performance as it >>> >> limits the number of page walks. We have been using this interface and this >>> >> adds flexibility to the user user space application implementing the reference >>> >> clearing. >>> >> >>> >> Signed-off-by: Jared Hulbert (jaredeh@gmail.com) >>> >> Signed-off-by: Moussa A. Ba (moussa.a.ba@gmail.com) >>> > >>> > Email addresses in < > braces, please. >>> > >>> > The first sign-off line normally indicates who wrote the patch, but your >>> > submission lacks a From: line, so git would indicate you wrote it. If >>> > that's incorrect, please add a From: line as described in >>> > Documentation/SubmittingPatches. If it's correct, please reorder your >>> > sign-off lines. >>> > >>> I will reorder the sign-off lines >> >> Ok, that removes the ambiguity concerning authorship, thanks. >> >>> >> ------- >>> >> Documentation/filesystems/proc.txt | 7 +++++++ >>> >> fs/proc/task_mmu.c | 29 +++++++++++++++++++++++++---- >>> >> 2 files changed, 32 insertions(+), 4 deletions(-) >>> >> >>> >> --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 >>> >> +++ b/fs/proc/task_mmu.c 2009-07-27 11:46:05.000000000 -0700 >>> >> @@ -462,6 +462,27 @@ >>> >> return 0; >>> >> } >>> >> >>> >> +static void walk_vma_area(struct mm_walk *this_walk, >>> >> + struct vm_area_struct *vma, int type) >>> >> +{ >>> > >>> > This is a very generic name for something that is only applicable to >>> > clear_refs, so please name it accordingly. This will also avoid having to >>> > pass the struct mm_walk * in since its only user is clear_refs_walk. >>> > >>> Done. >>> >> + >>> >> + /* Writing 2 to /proc/pid/clear_refs will clear all Anonymous >>> >> + * pages. >>> >> + * >>> >> + * Writing 3 to /proc/pid/clear_refs will clear all file mapped >>> >> + * pages. >>> >> + * >>> >> + * Writing any other value including 1 will clear all pages >>> >> + */ >>> > >>> > Documentation/CodingStyle would suggest this format: >>> > >>> > /* >>> > * Multi-line kernel comments always start .. >>> > * with an empty first line. >>> > */ >>> > >>> Done. >>> >> + if (is_vm_hugetlb_page(vma)) >>> >> + return; >>> >> + if (type == 2 && vma->vm_file) >>> >> + return; >>> >> + if (type == 3 && !vma->vm_file) >>> >> + return; >>> >> + walk_page_range(vma->vm_start, vma->vm_end, this_walk); >>> >> +} >>> > >>> > K&R would suggest #define's (or enums) for those hard-coded values. I >>> > think that's already been suggested for this patch, actually. >>> > >>> >>> Would this be acceptable? >>> >>> enum clear_refs_walk_type { >>> CLEAR_REFS_ALL = 1, >>> CLEAR_REFS_ANON = 2, >>> CLEAR_REFS_MAPPED = 3 >>> }; >>> >> >> #define seems more appropriate for this particular use case. We simply >> try to avoid hard-coded integers in source code (rationale: K&R). >> >>> static void clear_refs_walk_vma_area(struct mm_walk *this_walk, >>> struct vm_area_struct *vma, enum clear_refs_walk_type type) >>> { >> >> Ddi you consider my suggestion of dropping the struct mm_walk * formal >> since this is now a clear_refs-specific function? walk_page_range() will >> always take clear_refs_walk, there's no need to pass it in. >> > > Well, I did but I would have to either pass clear_refs_walk or mm and > build the mm_walk entry inside clear_refs_walk_vma. Simply passing > the clear_refs_walk structure seemed simpler and more logical than > having to rebuild it inside the function every time it is called. > > The other alternative would be to just forgo the additional function > clear_refs_walk_vma and rewrite the for loop as: > > for (vma = mm->mmap; vma; vma = vma->vm_next) { > clear_refs_walk.private = vma; > if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) > continue; > if (is_vm_hugetlb_page(vma)) > continue; > if (type == CLEAR_REFS_ANON && vma->vm_file) > continue; > if (type == CLEAR_REFS_MAPPED && !vma->vm_file) > continue; > walk_page_range(vma->vm_start, vma->vm_end, this_walk); > } > > Apologies the checking of the range of values should be outside the for loop. if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAl; for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; if (is_vm_hugetlb_page(vma)) continue; if (type == CLEAR_REFS_ANON && vma->vm_file) continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; walk_page_range(vma->vm_start, vma->vm_end, this_walk); } > >>> >>> /* >>> * Writing 1 to /proc/pid/clear_refs clears all pages. >>> * Writing 2 to /proc/pid/clear_refs clears Anonymous pages. >>> * Writing 3 to /proc/pid/clear_refs clears file mapped pages. >>> */ >>> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) >>> return; >>> if (is_vm_hugetlb_page(vma)) >>> return; >>> if (type == CLEAR_REFS_ANON && vma->vm_file) >>> return; >>> if (type == CLEAR_REFS_MAPPED && !vma->vm_file) >>> return; >>> walk_page_range(vma->vm_start, vma->vm_end, this_walk); >>> } >>> >>> >>> >> + >>> >> static ssize_t clear_refs_write(struct file *file, const char __user * buf, >>> >> size_t count, loff_t * ppos) >>> >> { >>> >> @@ -469,13 +490,15 @@ >>> >> char buffer[PROC_NUMBUF], *end; >>> >> struct mm_struct *mm; >>> >> struct vm_area_struct *vma; >>> >> + int type; >>> >> >>> >> memset(buffer, 0, sizeof(buffer)); >>> >> if (count > sizeof(buffer) - 1) >>> >> count = sizeof(buffer) - 1; >>> >> if (copy_from_user(buffer, buf, count)) >>> >> return -EFAULT; >>> >> - if (!simple_strtol(buffer, &end, 0)) >>> >> + type = strict_strtol(buffer, &end, 0); >>> >> + if (!type) >>> >> return -EINVAL; >>> >> if (*end == '\n') >>> >> end++; >>> >> @@ -491,9 +514,7 @@ >>> >> down_read(&mm->mmap_sem); >>> >> for (vma = mm->mmap; vma; vma = vma->vm_next) { >>> >> clear_refs_walk.private = vma; >>> >> - if (!is_vm_hugetlb_page(vma)) >>> >> - walk_page_range(vma->vm_start, vma->vm_end, >>> >> - &clear_refs_walk); >>> >> + walk_vma_area(&clear_refs_walk, vma, type); >>> >> } >>> >> flush_tlb_mm(mm); >>> >> up_read(&mm->mmap_sem); >>> >> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 >>> >> -0700 >>> >> +++ b/Documentation/filesystems/proc.txt 2009-07-27 12:08:34.000000000 >>> >> -0700 >>> >> @@ -375,6 +375,13 @@ >>> >> This file is only present if the CONFIG_MMU kernel configuration option is >>> >> enabled. >>> >> >>> >> +The clear_refs entry is used to reset the Referenced bits on virtual and physical >>> >> +pages associated with a process. >>> >> +echo 1 > /proc/PID/clear_refs clears all pages associated with the process >>> >> +echo 2 > /proc/PID/clear_refs clears anonymous pages only >>> >> +echo 3 > /proc/PID/clear_refs clears file mapped pages only >>> >> +Any other value written to the proc entry will clear all pages. >>> >> + >>> > >>> > Please follow the format in this document for how other /proc/PID/* >>> > entries are described. >>> > >>> > That format could really be improved here, perhaps you could clean >>> > proc.txt up a little bit while you're here? >>> > >>> > >>> >>> I am not sure what you mean by "clean" proc.txt, I did not detect much >>> formatting in the PID proc enries description, beyond what I rewrote >>> below: >>> >> >> Right, the file is pretty sloppy, so I was wondering if you wanted to take >> the time to clean it up a little so there's a more consistent style. >> >>> >>> The /proc/PID/clear_refs is used to reset the Referenced bits on virtual and >> >> Probably better to say PG_referenced instead of Referenced. > > Okay, though it would have to also state that the pte bits are cleared as well. >> >>> physical pages associated with a process. >>> To clear all pages associated with the process >>> > echo 1 > /proc/PID/clear_refs >>> >>> To clear all anonymous pages associated with the process >>> > echo 2 > /proc/PID/clear_refs >>> >>> To clear all file mapped pages associated with the process >>> > echo 3 > /proc/PID/clear_refs >>> Any other value written to /proc/PID/clear_refs will have no effect. >>> >> >> You should probably say these all clear the bit instead of saying they >> "clear pages," which doesn't make a lot of sense. > > You are correct. > ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 23:38 ` Moussa Ba 2009-07-27 23:42 ` Moussa Ba @ 2009-07-27 23:58 ` David Rientjes 2009-07-28 3:24 ` Amerigo Wang 2009-07-28 16:44 ` Moussa A. Ba 1 sibling, 2 replies; 21+ messages in thread From: David Rientjes @ 2009-07-27 23:58 UTC (permalink / raw) To: Moussa Ba Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, 27 Jul 2009, Moussa Ba wrote: > The other alternative would be to just forgo the additional function > clear_refs_walk_vma and rewrite the for loop as: > > for (vma = mm->mmap; vma; vma = vma->vm_next) { > clear_refs_walk.private = vma; > if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) > continue; > if (is_vm_hugetlb_page(vma)) > continue; > if (type == CLEAR_REFS_ANON && vma->vm_file) > continue; > if (type == CLEAR_REFS_MAPPED && !vma->vm_file) > continue; > walk_page_range(vma->vm_start, vma->vm_end, this_walk); > } > That looks good, with the exception of the check for type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED being in the loop. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 23:58 ` David Rientjes @ 2009-07-28 3:24 ` Amerigo Wang 2009-07-28 16:44 ` Moussa A. Ba 1 sibling, 0 replies; 21+ messages in thread From: Amerigo Wang @ 2009-07-28 3:24 UTC (permalink / raw) To: David Rientjes Cc: Moussa Ba, linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Mon, Jul 27, 2009 at 04:58:20PM -0700, David Rientjes wrote: >On Mon, 27 Jul 2009, Moussa Ba wrote: > >> The other alternative would be to just forgo the additional function >> clear_refs_walk_vma and rewrite the for loop as: >> >> for (vma = mm->mmap; vma; vma = vma->vm_next) { >> clear_refs_walk.private = vma; >> if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) >> continue; >> if (is_vm_hugetlb_page(vma)) >> continue; >> if (type == CLEAR_REFS_ANON && vma->vm_file) >> continue; >> if (type == CLEAR_REFS_MAPPED && !vma->vm_file) >> continue; >> walk_page_range(vma->vm_start, vma->vm_end, this_walk); >> } >> > >That looks good, with the exception of the check for >type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED being in the loop. And also that it looks like is_vm_hugetlb_page() is already checked outside, no need to check it again. :-) Thanks. ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-27 23:58 ` David Rientjes 2009-07-28 3:24 ` Amerigo Wang @ 2009-07-28 16:44 ` Moussa A. Ba 2009-07-28 21:01 ` David Rientjes 1 sibling, 1 reply; 21+ messages in thread From: Moussa A. Ba @ 2009-07-28 16:44 UTC (permalink / raw) To: linux-kernel Cc: David Rientjes, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh This patch adds anonymous and file backed filters to the clear_refs interface. echo 1 > /proc/PID/clear_refs resets the bits on all pages echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only Any other value is ignored. --- Signed-off-by: Moussa A. Ba <moussa.a.ba@gmail.com> Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700 +++ b/Documentation/filesystems/proc.txt 2009-07-27 16:58:05.000000000 -0700 @@ -375,6 +375,18 @@ of memory currently marked as referenced This file is only present if the CONFIG_MMU kernel configuration option is enabled. +The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG +bits on both physical and virtual pages associated with a process. +To clear the bits for all the pages associated with the process + > echo 1 > /proc/PID/clear_refs + +To clear the bits for the anonymous pages associated with the process + > echo 2 > /proc/PID/clear_refs + +To clear the bits for the file backed pages associated with the process + > echo 3 > /proc/PID/clear_refs +Any other value written to /proc/PID/clear_refs will have no effect. + 1.2 Kernel data --------------- --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 +++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700 @@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t * return 0; } +#define CLEAR_REFS_ALL 1 +#define CLEAR_REFS_ANON 2 +#define CLEAR_REFS_MAPPED 3 + static ssize_t clear_refs_write(struct file *file, const char __user * buf, size_t count, loff_t * ppos) { @@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f char buffer[PROC_NUMBUF], *end; struct mm_struct *mm; struct vm_area_struct *vma; + int type; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) + type = simple_strtol(buffer, &end, 0); + if (!type || type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; if (*end == '\n') end++; @@ -491,9 +497,23 @@ static ssize_t clear_refs_write(struct f down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; - if (!is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); + if (is_vm_hugetlb_page(vma)) + continue; + /* + * Writing 1 to /proc/pid/clear_refs affects all pages. + * + * Writing 2 to /proc/pid/clear_refs only affects + * Anonymous pages. + * + * Writing 3 to /proc/pid/clear_refs only affects file + * backed pages. + */ + if (type == CLEAR_REFS_ANON && vma->vm_file) + continue; + if (type == CLEAR_REFS_MAPPED && !vma->vm_file) + continue; + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); } flush_tlb_mm(mm); up_read(&mm->mmap_sem); ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-28 16:44 ` Moussa A. Ba @ 2009-07-28 21:01 ` David Rientjes 2009-07-28 22:52 ` Moussa A. Ba 0 siblings, 1 reply; 21+ messages in thread From: David Rientjes @ 2009-07-28 21:01 UTC (permalink / raw) To: Moussa A. Ba Cc: linux-kernel, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh On Tue, 28 Jul 2009, Moussa A. Ba wrote: > --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 > +++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700 > @@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t * > return 0; > } > > +#define CLEAR_REFS_ALL 1 > +#define CLEAR_REFS_ANON 2 > +#define CLEAR_REFS_MAPPED 3 > + > static ssize_t clear_refs_write(struct file *file, const char __user * buf, > size_t count, loff_t * ppos) > { > @@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f > char buffer[PROC_NUMBUF], *end; > struct mm_struct *mm; > struct vm_area_struct *vma; > + int type; > > memset(buffer, 0, sizeof(buffer)); > if (count > sizeof(buffer) - 1) > count = sizeof(buffer) - 1; > if (copy_from_user(buffer, buf, count)) > return -EFAULT; > - if (!simple_strtol(buffer, &end, 0)) > + type = simple_strtol(buffer, &end, 0); > + if (!type || type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) > return -EINVAL; > if (*end == '\n') > end++; The test for type < CLEAR_REFS_ALL covers !type. ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-28 21:01 ` David Rientjes @ 2009-07-28 22:52 ` Moussa A. Ba 2009-07-28 23:52 ` Andrew Morton 0 siblings, 1 reply; 21+ messages in thread From: Moussa A. Ba @ 2009-07-28 22:52 UTC (permalink / raw) To: linux-kernel Cc: David Rientjes, xiyou.wangcong, Andrew Morton, Alexey Dobriyan, Matt Mackall, Mel Gorman, Ying Han, Nick Piggin, jaredeh This patch adds anonymous and file backed filters to the clear_refs interface. echo 1 > /proc/PID/clear_refs resets the bits on all pages echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only Any other value is ignored --- Signed-off-by: Moussa A. Ba <moussa.a.ba@gmail.com> Signed-off-by: Jared E. Hulbert <jaredeh@gmail.com> Acked-by: David Rientjes <rientjes@google.com> --- a/Documentation/filesystems/proc.txt 2009-07-20 17:29:11.000000000 -0700 +++ b/Documentation/filesystems/proc.txt 2009-07-27 16:58:05.000000000 -0700 @@ -375,6 +375,18 @@ of memory currently marked as referenced This file is only present if the CONFIG_MMU kernel configuration option is enabled. +The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG +bits on both physical and virtual pages associated with a process. +To clear the bits for all the pages associated with the process + > echo 1 > /proc/PID/clear_refs + +To clear the bits for the anonymous pages associated with the process + > echo 2 > /proc/PID/clear_refs + +To clear the bits for the file mapped pages associated with the process + > echo 3 > /proc/PID/clear_refs +Any other value written to /proc/PID/clear_refs will have no effect. + 1.2 Kernel data --------------- --- a/fs/proc/task_mmu.c 2009-07-21 14:30:01.000000000 -0700 +++ b/fs/proc/task_mmu.c 2009-07-27 17:05:25.000000000 -0700 @@ -462,6 +462,10 @@ static int clear_refs_pte_range(pmd_t * return 0; } +#define CLEAR_REFS_ALL 1 +#define CLEAR_REFS_ANON 2 +#define CLEAR_REFS_MAPPED 3 + static ssize_t clear_refs_write(struct file *file, const char __user * buf, size_t count, loff_t * ppos) { @@ -469,13 +473,15 @@ static ssize_t clear_refs_write(struct f char buffer[PROC_NUMBUF], *end; struct mm_struct *mm; struct vm_area_struct *vma; + int type; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) + type = simple_strtol(buffer, &end, 0); + if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) return -EINVAL; if (*end == '\n') end++; @@ -491,9 +497,23 @@ static ssize_t clear_refs_write(struct f down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; - if (!is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); + if (is_vm_hugetlb_page(vma)) + continue; + /* + * Writing 1 to /proc/pid/clear_refs affects all pages. + * + * Writing 2 to /proc/pid/clear_refs only affects + * Anonymous pages. + * + * Writing 3 to /proc/pid/clear_refs only affects file + * mapped pages. + */ + if (type == CLEAR_REFS_ANON && vma->vm_file) + continue; + if (type == CLEAR_REFS_MAPPED && !vma->vm_file) + continue; + walk_page_range(vma->vm_start, vma->vm_end, + &clear_refs_walk); } flush_tlb_mm(mm); up_read(&mm->mmap_sem); ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-28 22:52 ` Moussa A. Ba @ 2009-07-28 23:52 ` Andrew Morton 2009-07-29 0:00 ` Moussa Ba 0 siblings, 1 reply; 21+ messages in thread From: Andrew Morton @ 2009-07-28 23:52 UTC (permalink / raw) To: Moussa A. Ba Cc: linux-kernel, rientjes, xiyou.wangcong, adobriyan, mpm, mel, yinghan, npiggin, jaredeh On Tue, 28 Jul 2009 15:52:05 -0700 "Moussa A. Ba" <moussa.a.ba@gmail.com> wrote: > This patch adds anonymous and file backed filters to the clear_refs interface. > echo 1 > /proc/PID/clear_refs resets the bits on all pages > echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only > echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only > Any other value is ignored The changelog is missing any rationale for making this change. Originally we were told that it "makes the clear_refs proc interface a bit more versatile", but that's a bit thin. How do we justify making this change to Linux? What value does it have? Can you describe a usage scenario which would help people understand the usefulness of the change? Thanks. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-28 23:52 ` Andrew Morton @ 2009-07-29 0:00 ` Moussa Ba 2009-07-29 14:58 ` Mel Gorman 0 siblings, 1 reply; 21+ messages in thread From: Moussa Ba @ 2009-07-29 0:00 UTC (permalink / raw) To: Andrew Morton Cc: linux-kernel, rientjes, xiyou.wangcong, adobriyan, mpm, mel, yinghan, npiggin, jaredeh The patch makes the clear_refs more versatile in adding the option to select anonymous pages or file backed pages for clearing. This addition has a measurable impact on user space application performance as it decreases the number of pagewalks in scenarios where one is only interested in a specific type of page (anonymous or file mapped). On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<akpm@linux-foundation.org> wrote: > On Tue, 28 Jul 2009 15:52:05 -0700 > "Moussa A. Ba" <moussa.a.ba@gmail.com> wrote: > >> This patch adds anonymous and file backed filters to the clear_refs interface. >> echo 1 > /proc/PID/clear_refs resets the bits on all pages >> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only >> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only >> Any other value is ignored > > The changelog is missing any rationale for making this change. > Originally we were told that it "makes the clear_refs proc interface a > bit more versatile", but that's a bit thin. > > How do we justify making this change to Linux? What value does it > have? Can you describe a usage scenario which would help people > understand the usefulness of the change? > > Thanks. > ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-29 0:00 ` Moussa Ba @ 2009-07-29 14:58 ` Mel Gorman 2009-07-29 16:41 ` Matt Mackall 2009-07-30 19:21 ` Moussa Ba 0 siblings, 2 replies; 21+ messages in thread From: Mel Gorman @ 2009-07-29 14:58 UTC (permalink / raw) To: Moussa Ba Cc: Andrew Morton, linux-kernel, rientjes, xiyou.wangcong, adobriyan, mpm, yinghan, npiggin, jaredeh On Tue, Jul 28, 2009 at 05:00:54PM -0700, Moussa Ba wrote: > The patch makes the clear_refs more versatile in adding the option to > select anonymous pages or file backed pages for clearing. This > addition has a measurable impact on user space application performance > as it decreases the number of pagewalks in scenarios where one is only > interested in a specific type of page (anonymous or file mapped). > I think what Andrew might be looking for is a repeat of the use-case for using clear_refs at all and what the additional usecase is that makes this patch beneficial. To be honest, I had to go digging for a bit to find out why clear_refs is used at all and the potential benefits of this patch were initially unclear to me although they were obvious to others. I confess I haven't read the patch closely to determine if it's behaving as advertised or not. Bonus points for a wee illustration of a script that measures the apparent working set of a process using clear_refs, showing the working set for anon vs filebacked and the difference of measuring just anon versus the full process for example. Such a script could be added to Documentation/vm as I didn't spot any example in there already. Total aside, is there potential to really mess with LRU aging by abusing clear_refs a lot, partcularly as clear_refs is writable by the process owner? Does it matter? > > On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<akpm@linux-foundation.org> wrote: > > On Tue, 28 Jul 2009 15:52:05 -0700 > > "Moussa A. Ba" <moussa.a.ba@gmail.com> wrote: > > > >> This patch adds anonymous and file backed filters to the clear_refs interface. > >> echo 1 > /proc/PID/clear_refs resets the bits on all pages > >> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only > >> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only > >> Any other value is ignored > > > > The changelog is missing any rationale for making this change. > > Originally we were told that it "makes the clear_refs proc interface a > > bit more versatile", but that's a bit thin. > > > > How do we justify making this change to Linux? What value does it > > have? Can you describe a usage scenario which would help people > > understand the usefulness of the change? > > > > Thanks. > > > -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-29 14:58 ` Mel Gorman @ 2009-07-29 16:41 ` Matt Mackall 2009-07-30 19:21 ` Moussa Ba 1 sibling, 0 replies; 21+ messages in thread From: Matt Mackall @ 2009-07-29 16:41 UTC (permalink / raw) To: Mel Gorman Cc: Moussa Ba, Andrew Morton, linux-kernel, rientjes, xiyou.wangcong, adobriyan, yinghan, npiggin, jaredeh On Wed, 2009-07-29 at 15:58 +0100, Mel Gorman wrote: > Total aside, is there potential to really mess with LRU aging by abusing > clear_refs a lot, partcularly as clear_refs is writable by the process > owner? Does it matter? Absolutely. However, it's generally the process owner who will suffer. -- http://selenic.com : development and support for Mercurial and Linux ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing 2009-07-29 14:58 ` Mel Gorman 2009-07-29 16:41 ` Matt Mackall @ 2009-07-30 19:21 ` Moussa Ba 1 sibling, 0 replies; 21+ messages in thread From: Moussa Ba @ 2009-07-30 19:21 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, linux-kernel, rientjes, xiyou.wangcong, adobriyan, mpm, yinghan, npiggin, jaredeh Mel, here are two scripts that make use of the interface to "measure" Referenced anon vs. mapped pages. save the awk script to refs.awk and run the bash script as follows ./refscount PID secs PID is the process you want to monitor while secs is the number of seconds to sleep before clearing the refs. The output after x secs is a the total number of refs before and immediately after clearing. Here is an example run using firefox as I was writing this email. /usr/lib/firefox-3.0.3/firefox 12:10:58 Mapped = 12856 kB Anon = 27896 kB 12:10:58 Mapped = 760 kB Anon = 4 kB 12:11:08 Mapped = 5884 kB Anon = 20340 kB . . . . 12:16:19 Mapped = 476 kB Anon = 0 kB 12:16:29 Mapped = 7016 kB Anon = 13128 kB 12:16:29 Mapped = 476 kB Anon = 0 kB 12:16:39 Mapped = 9276 kB Anon = 21588 kB 12:16:39 Mapped = 476 kB Anon = 0 kB 12:16:49 Mapped = 10288 kB Anon = 16000 kB 12:16:49 Mapped = 716 kB Anon = 12 kB BEGIN BASH SCRIPT -------------------------------- #!/bin/bash echo `cat /proc/$1/cmdline` #Intial state of the process cat /proc/$1/smaps | awk -f refs.awk while [ true ] do echo 1 > /proc/$1/clear_refs # Immidiately after clearing cat /proc/$1/smaps | awk -f refs.awk sleep $2 # after x seconds cat /proc/$1/smaps | awk -f refs.awk done --------------------------- END BASH SCRIPT BEGIN AWK SCRIPT ----------------------- BEGIN {refsanon = 0;refsmapped=0;} { if (NF == 6) { if ($4 == "00:00") type = 1; else type = 2; } if ($1 == "Referenced:") { if (type == 1) refsanon += $2; if (type == 2) refsmapped += $2; type = 0; } } END {printf("%s ",strftime("%H:%M:%S")); print "Mapped = " refsmapped " kB Anon = " refsanon " kB";} --------------------------- END AWK SCRIPT On Wed, Jul 29, 2009 at 7:58 AM, Mel Gorman<mel@csn.ul.ie> wrote: > On Tue, Jul 28, 2009 at 05:00:54PM -0700, Moussa Ba wrote: >> The patch makes the clear_refs more versatile in adding the option to >> select anonymous pages or file backed pages for clearing. This >> addition has a measurable impact on user space application performance >> as it decreases the number of pagewalks in scenarios where one is only >> interested in a specific type of page (anonymous or file mapped). >> > > I think what Andrew might be looking for is a repeat of the use-case for > using clear_refs at all and what the additional usecase is that makes this > patch beneficial. To be honest, I had to go digging for a bit to find out > why clear_refs is used at all and the potential benefits of this patch were > initially unclear to me although they were obvious to others. I confess I > haven't read the patch closely to determine if it's behaving as advertised > or not. > > Bonus points for a wee illustration of a script that measures the apparent > working set of a process using clear_refs, showing the working set for anon > vs filebacked and the difference of measuring just anon versus the full > process for example. Such a script could be added to Documentation/vm as > I didn't spot any example in there already. > > Total aside, is there potential to really mess with LRU aging by abusing > clear_refs a lot, partcularly as clear_refs is writable by the process > owner? Does it matter? > >> >> On Tue, Jul 28, 2009 at 4:52 PM, Andrew Morton<akpm@linux-foundation.org> wrote: >> > On Tue, 28 Jul 2009 15:52:05 -0700 >> > "Moussa A. Ba" <moussa.a.ba@gmail.com> wrote: >> > >> >> This patch adds anonymous and file backed filters to the clear_refs interface. >> >> echo 1 > /proc/PID/clear_refs resets the bits on all pages >> >> echo 2 > /proc/PID/clear_refs resets the bits on anonymous pages only >> >> echo 3 > /proc/PID/clear_refs resets the bits on file backed pages only >> >> Any other value is ignored >> > >> > The changelog is missing any rationale for making this change. >> > Originally we were told that it "makes the clear_refs proc interface a >> > bit more versatile", but that's a bit thin. >> > >> > How do we justify making this change to Linux? What value does it >> > have? Can you describe a usage scenario which would help people >> > understand the usefulness of the change? >> > >> > Thanks. >> > >> > > -- > Mel Gorman > Part-time Phd Student Linux Technology Center > University of Limerick IBM Dublin Software Lab > ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2009-07-30 19:21 UTC | newest] Thread overview: 21+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-07-24 3:57 [PATCH 1/1] pagemap clear_refs: modify to specify anon or mapped vma clearing Moussa A. Ba 2009-07-24 8:39 ` Amerigo Wang 2009-07-24 18:00 ` Moussa Ba 2009-07-27 20:19 ` Moussa A. Ba 2009-07-27 20:30 ` Matt Mackall 2009-07-27 20:57 ` David Rientjes 2009-07-27 22:14 ` Moussa Ba 2009-07-27 22:20 ` Matt Mackall 2009-07-27 22:49 ` David Rientjes 2009-07-27 23:38 ` Moussa Ba 2009-07-27 23:42 ` Moussa Ba 2009-07-27 23:58 ` David Rientjes 2009-07-28 3:24 ` Amerigo Wang 2009-07-28 16:44 ` Moussa A. Ba 2009-07-28 21:01 ` David Rientjes 2009-07-28 22:52 ` Moussa A. Ba 2009-07-28 23:52 ` Andrew Morton 2009-07-29 0:00 ` Moussa Ba 2009-07-29 14:58 ` Mel Gorman 2009-07-29 16:41 ` Matt Mackall 2009-07-30 19:21 ` Moussa Ba
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox