* [RFC] procfs: Add VmFlags field in smaps output
@ 2012-10-18 9:55 Cyrill Gorcunov
2012-10-18 10:31 ` Cyrill Gorcunov
0 siblings, 1 reply; 5+ messages in thread
From: Cyrill Gorcunov @ 2012-10-18 9:55 UTC (permalink / raw)
To: LKML; +Cc: Pavel Emelyanov, Andrew Morton, Peter Zijlstra
Hi guys, in a sake of c/r we need to fetch additional
VMA characteristics, so I though would /proc/pid/smaps
be appropriate place for it? If yes, I would like to
know if the output format provided below looks reasonable.
Please review, thanks!
---
From: Cyrill Gorcunov <gorcunov@openvz.org>
Subject: [RFC] procfs: Add VmFlags field in smaps output
When we do restore VMA area after checkpoint
we would like to know if the area was locked
or say it has mergeable attribute, but at moment
the kernel does not provide such information, thus
we can't figure out if we should call mlock/madvise
on VMA restore.
This patch adds new VmFlags field to smaps output
with vma->vm_flags encoded.
This field is CONFIG_CHECKPOINT_RESTORE dependent
since at moment I don't know if someone else might
need it.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
fs/proc/task_mmu.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
Index: linux-2.6.git/fs/proc/task_mmu.c
===================================================================
--- linux-2.6.git.orig/fs/proc/task_mmu.c
+++ linux-2.6.git/fs/proc/task_mmu.c
@@ -480,6 +480,44 @@ static int smaps_pte_range(pmd_t *pmd, u
return 0;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
+{
+ seq_puts(m, "VmFlags: ");
+
+ if (vma->vm_flags & VM_LOCKED)
+ seq_puts(m, " locked");
+
+ if (vma->vm_flags & VM_GROWSDOWN)
+ seq_puts(m, " growsdown");
+
+ if (vma->vm_flags & VM_RAND_READ)
+ seq_puts(m, " rand_read");
+
+ if (vma->vm_flags & VM_SEQ_READ)
+ seq_puts(m, " seq_read");
+
+ if (vma->vm_flags & VM_DONTCOPY)
+ seq_puts(m, " dontcopy");
+
+ if (vma->vm_flags & VM_MERGEABLE)
+ seq_puts(m, " mergeable");
+
+ if (vma->vm_flags & VM_HUGEPAGE)
+ seq_puts(m, " hugepage");
+
+ if (vma->vm_flags & VM_NOHUGEPAGE)
+ seq_puts(m, " nohugepage");
+
+ if (vma->vm_flags & VM_DONTDUMP)
+ seq_puts(m, " dontdump");
+
+ seq_putc(m, '\n');
+}
+#else
+static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { }
+#endif
+
static int show_smap(struct seq_file *m, void *v, int is_pid)
{
struct proc_maps_private *priv = m->private;
@@ -535,6 +573,8 @@ static int show_smap(struct seq_file *m,
seq_printf(m, "Nonlinear: %8lu kB\n",
mss.nonlinear >> 10);
+ show_smap_vma_flags(m, vma);
+
if (m->count < m->size) /* vma is copied successfully */
m->version = (vma != get_gate_vma(task->mm))
? vma->vm_start : 0;
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [RFC] procfs: Add VmFlags field in smaps output 2012-10-18 9:55 [RFC] procfs: Add VmFlags field in smaps output Cyrill Gorcunov @ 2012-10-18 10:31 ` Cyrill Gorcunov 2012-10-18 21:02 ` Andrew Morton 0 siblings, 1 reply; 5+ messages in thread From: Cyrill Gorcunov @ 2012-10-18 10:31 UTC (permalink / raw) To: LKML; +Cc: Pavel Emelyanov, Andrew Morton, Peter Zijlstra On Thu, Oct 18, 2012 at 01:55:03PM +0400, Cyrill Gorcunov wrote: > Hi guys, in a sake of c/r we need to fetch additional > VMA characteristics, so I though would /proc/pid/smaps > be appropriate place for it? If yes, I would like to > know if the output format provided below looks reasonable. > > Please review, thanks! > --- Also I've had a bit different output format, not sure which one is better. --- From: Cyrill Gorcunov <gorcunov@openvz.org> Subject: [RFC] procfs: Add VmFlags field in smaps output When we do restore VMA area after checkpoint we would like to know if the area was locked or say it has mergeable attribute, but at moment the kernel does not provide such information, thus we can't figure out if we should call mlock/madvise on VMA restore. This patch adds new VmFlags field to smaps output with vma->vm_flags encoded. This field is CONFIG_CHECKPOINT_RESTORE dependent since at moment I don't know if someone else might need it. Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> CC: Pavel Emelyanov <xemul@parallels.com> CC: Andrew Morton <akpm@linux-foundation.org> CC: Peter Zijlstra <a.p.zijlstra@chello.nl> --- fs/proc/task_mmu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) Index: linux-2.6.git/fs/proc/task_mmu.c =================================================================== --- linux-2.6.git.orig/fs/proc/task_mmu.c +++ linux-2.6.git/fs/proc/task_mmu.c @@ -480,6 +480,25 @@ static int smaps_pte_range(pmd_t *pmd, u return 0; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) +{ + seq_printf(m, + "VmFlags: %c%c%c%c%c%c%c%c%c\n", + vma->vm_flags & VM_LOCKED ? 'l' : '-', + vma->vm_flags & VM_GROWSDOWN ? 'g' : '-', + vma->vm_flags & VM_RAND_READ ? 'r' : '-', + vma->vm_flags & VM_SEQ_READ ? 'q' : '-', + vma->vm_flags & VM_DONTCOPY ? '-' : 'c', + vma->vm_flags & VM_MERGEABLE ? 'm' : '-', + vma->vm_flags & VM_HUGEPAGE ? 'h' : '-', + vma->vm_flags & VM_NOHUGEPAGE ? '-' : 'h', + vma->vm_flags & VM_DONTDUMP ? '-' : 'd'); +} +#else +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { } +#endif + static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; @@ -535,6 +554,8 @@ static int show_smap(struct seq_file *m, seq_printf(m, "Nonlinear: %8lu kB\n", mss.nonlinear >> 10); + show_smap_vma_flags(m, vma); + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] procfs: Add VmFlags field in smaps output 2012-10-18 10:31 ` Cyrill Gorcunov @ 2012-10-18 21:02 ` Andrew Morton 2012-10-18 21:28 ` Cyrill Gorcunov 0 siblings, 1 reply; 5+ messages in thread From: Andrew Morton @ 2012-10-18 21:02 UTC (permalink / raw) To: Cyrill Gorcunov; +Cc: LKML, Pavel Emelyanov, Peter Zijlstra On Thu, 18 Oct 2012 14:31:18 +0400 Cyrill Gorcunov <gorcunov@openvz.org> wrote: > On Thu, Oct 18, 2012 at 01:55:03PM +0400, Cyrill Gorcunov wrote: > > Hi guys, in a sake of c/r we need to fetch additional > > VMA characteristics, so I though would /proc/pid/smaps > > be appropriate place for it? If yes, I would like to > > know if the output format provided below looks reasonable. > > > > Please review, thanks! > > --- > > Also I've had a bit different output format, not sure > which one is better. > --- > From: Cyrill Gorcunov <gorcunov@openvz.org> > Subject: [RFC] procfs: Add VmFlags field in smaps output > > When we do restore VMA area after checkpoint > we would like to know if the area was locked > or say it has mergeable attribute, but at moment > the kernel does not provide such information, thus > we can't figure out if we should call mlock/madvise > on VMA restore. > > This patch adds new VmFlags field to smaps output > with vma->vm_flags encoded. > > This field is CONFIG_CHECKPOINT_RESTORE dependent > since at moment I don't know if someone else might > need it. > > Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> > CC: Pavel Emelyanov <xemul@parallels.com> > CC: Andrew Morton <akpm@linux-foundation.org> > CC: Peter Zijlstra <a.p.zijlstra@chello.nl> > --- > fs/proc/task_mmu.c | 21 +++++++++++++++++++++ > 1 file changed, 21 insertions(+) > > Index: linux-2.6.git/fs/proc/task_mmu.c > =================================================================== > --- linux-2.6.git.orig/fs/proc/task_mmu.c > +++ linux-2.6.git/fs/proc/task_mmu.c > @@ -480,6 +480,25 @@ static int smaps_pte_range(pmd_t *pmd, u > return 0; > } > > +#ifdef CONFIG_CHECKPOINT_RESTORE > +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > +{ > + seq_printf(m, > + "VmFlags: %c%c%c%c%c%c%c%c%c\n", > + vma->vm_flags & VM_LOCKED ? 'l' : '-', > + vma->vm_flags & VM_GROWSDOWN ? 'g' : '-', > + vma->vm_flags & VM_RAND_READ ? 'r' : '-', > + vma->vm_flags & VM_SEQ_READ ? 'q' : '-', > + vma->vm_flags & VM_DONTCOPY ? '-' : 'c', > + vma->vm_flags & VM_MERGEABLE ? 'm' : '-', > + vma->vm_flags & VM_HUGEPAGE ? 'h' : '-', > + vma->vm_flags & VM_NOHUGEPAGE ? '-' : 'h', > + vma->vm_flags & VM_DONTDUMP ? '-' : 'd'); > +} > +#else > +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { } > +#endif I guess this more terse output is better. It should be documented (in Documentation/filesystems/proc.txt, I suppose) and we should add a code comment here reminding people to update that documentation when they change this function. There are about 28 VM_foo flags and here you've semi-randomly chosen 9 of them for display. This seems rather ugly and we should expect that people will change this code to display additional flags in the future. And we should also expect some of the flags which _are_ displayed to vanish in the future as the code evolves. This data is pretty dependent upon internal kernel implementation details, so it is something we normally try to avoid exporting to userspace. We should design the interface with these future changes in mind. That means careful documentation and perhaps a format which discourages userspace programmers from assuming that there will always be nine fields. A common way in which we do this future-proofing is to display the info in name:value tuples (eg, /proc/meminfo). So userspace parses for the "name" rather than looking into a fixed position in the /proc output. So.... with this thought in mind, perhaps a better output format would be something like: VmFlags: LO:1 GR:0 RA:0 SE:1 ... ie: a two-character "name" and a boolean "value". Something like that. Also, it worries me a bit that this interface vanishes if CONFIG_CHECKPOINT_RESTORE=n. One can easily anticipate that non-c/r userspace programs will start to use this interface, and they will want it to be present in CONFIG_CHECKPOINT_RESTORE=n kernels. ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] procfs: Add VmFlags field in smaps output 2012-10-18 21:02 ` Andrew Morton @ 2012-10-18 21:28 ` Cyrill Gorcunov 2012-10-19 9:45 ` Cyrill Gorcunov 0 siblings, 1 reply; 5+ messages in thread From: Cyrill Gorcunov @ 2012-10-18 21:28 UTC (permalink / raw) To: Andrew Morton; +Cc: LKML, Pavel Emelyanov, Peter Zijlstra On Thu, Oct 18, 2012 at 02:02:59PM -0700, Andrew Morton wrote: > > +#ifdef CONFIG_CHECKPOINT_RESTORE > > +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > > +{ > > + seq_printf(m, > > + "VmFlags: %c%c%c%c%c%c%c%c%c\n", > > + vma->vm_flags & VM_LOCKED ? 'l' : '-', > > + vma->vm_flags & VM_GROWSDOWN ? 'g' : '-', > > + vma->vm_flags & VM_RAND_READ ? 'r' : '-', > > + vma->vm_flags & VM_SEQ_READ ? 'q' : '-', > > + vma->vm_flags & VM_DONTCOPY ? '-' : 'c', > > + vma->vm_flags & VM_MERGEABLE ? 'm' : '-', > > + vma->vm_flags & VM_HUGEPAGE ? 'h' : '-', > > + vma->vm_flags & VM_NOHUGEPAGE ? '-' : 'h', > > + vma->vm_flags & VM_DONTDUMP ? '-' : 'd'); > > +} > > +#else > > +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) { } > > +#endif > > I guess this more terse output is better. It should be documented (in Well, I started with terse output but then I thought that if one day some flag get vanished we will have to remember which character has been used for it to not reassign it to some new flag in future. That's from where this long words came. > Documentation/filesystems/proc.txt, I suppose) and we should add a code > comment here reminding people to update that documentation when they > change this function. Sure, thanks for reminder Andrew, I must admit I forgot about docs. > There are about 28 VM_foo flags and here you've semi-randomly chosen 9 > of them for display. This seems rather ugly and we should expect that Yes, but I choose only those flags which can be used/modified from user space and which can't be fetched by other way (for example we can figure out if vma is executable/shared/private from /proc/pid/map). Thus I think better try to not display flags which are pretty kernel internal, no? > people will change this code to display additional flags in the future. > > And we should also expect some of the flags which _are_ displayed to > vanish in the future as the code evolves. > > This data is pretty dependent upon internal kernel implementation > details, so it is something we normally try to avoid exporting to > userspace. Yes, but I fear here I have no choise. These flags modify the kernel behaviour and they are set from userspace (mlock/madvise) so we need some way to fetch them back and do yield appropriate syscalls on restore procedure. > We should design the interface with these future changes in mind. That > means careful documentation and perhaps a format which discourages > userspace programmers from assuming that there will always be nine > fields. > > A common way in which we do this future-proofing is to display the info > in name:value tuples (eg, /proc/meminfo). So userspace parses for the > "name" rather than looking into a fixed position in the /proc output. > > So.... with this thought in mind, perhaps a better output format would > be something like: > > VmFlags: LO:1 GR:0 RA:0 SE:1 ... > > ie: a two-character "name" and a boolean "value". Something like that. OK, Andrew, I'll try to come with something like that tomorrow, thanks! > Also, it worries me a bit that this interface vanishes if > CONFIG_CHECKPOINT_RESTORE=n. One can easily anticipate that non-c/r > userspace programs will start to use this interface, and they will want > it to be present in CONFIG_CHECKPOINT_RESTORE=n kernels. Well, hard to tell, I personally can't imagine who else and why might need this flags, but there is no problem to drop this CONFIG wrap if needed. ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] procfs: Add VmFlags field in smaps output 2012-10-18 21:28 ` Cyrill Gorcunov @ 2012-10-19 9:45 ` Cyrill Gorcunov 0 siblings, 0 replies; 5+ messages in thread From: Cyrill Gorcunov @ 2012-10-19 9:45 UTC (permalink / raw) To: Andrew Morton Cc: LKML, Pavel Emelyanov, Peter Zijlstra, Konstantin Khlebnikov On Fri, Oct 19, 2012 at 01:28:35AM +0400, Cyrill Gorcunov wrote: > > > > A common way in which we do this future-proofing is to display the info > > in name:value tuples (eg, /proc/meminfo). So userspace parses for the > > "name" rather than looking into a fixed position in the /proc output. > > > > So.... with this thought in mind, perhaps a better output format would > > be something like: > > > > VmFlags: LO:1 GR:0 RA:0 SE:1 ... > > > > ie: a two-character "name" and a boolean "value". Something like that. > > OK, Andrew, I'll try to come with something like that tomorrow, thanks! Does the format below looks well enough (i'll make docs update as well once things settle down)? Also I thought maybe it should be protected with CAP_SYS_ADMIN or something? --- Index: linux-2.6.git/fs/proc/task_mmu.c =================================================================== --- linux-2.6.git.orig/fs/proc/task_mmu.c +++ linux-2.6.git/fs/proc/task_mmu.c @@ -480,6 +480,33 @@ static int smaps_pte_range(pmd_t *pmd, u return 0; } +static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) +{ +#define __VM_FLAG(_f) (!!(vma->vm_flags & (_f))) + seq_printf(m, "VmFlags: " + "RD:%d WR:%d EX:%d SH:%d MR:%d " + "MW:%d ME:%d MS:%d GD:%d PF:%d " + "DW:%d LO:%d IO:%d SR:%d RR:%d " + "DC:%d DE:%d AC:%d NR:%d HT:%d " + "NL:%d AR:%d DD:%d MM:%d HG:%d " + "NH:%d MG:%d\n", + __VM_FLAG(VM_READ), __VM_FLAG(VM_WRITE), + __VM_FLAG(VM_EXEC), __VM_FLAG(VM_SHARED), + __VM_FLAG(VM_MAYREAD), __VM_FLAG(VM_MAYWRITE), + __VM_FLAG(VM_MAYEXEC), __VM_FLAG(VM_MAYSHARE), + __VM_FLAG(VM_GROWSDOWN), __VM_FLAG(VM_PFNMAP), + __VM_FLAG(VM_DENYWRITE), __VM_FLAG(VM_LOCKED), + __VM_FLAG(VM_IO), __VM_FLAG(VM_SEQ_READ), + __VM_FLAG(VM_RAND_READ), __VM_FLAG(VM_DONTCOPY), + __VM_FLAG(VM_DONTEXPAND), __VM_FLAG(VM_ACCOUNT), + __VM_FLAG(VM_NORESERVE), __VM_FLAG(VM_HUGETLB), + __VM_FLAG(VM_NONLINEAR), __VM_FLAG(VM_ARCH_1), + __VM_FLAG(VM_DONTDUMP), __VM_FLAG(VM_MIXEDMAP), + __VM_FLAG(VM_HUGEPAGE), __VM_FLAG(VM_NOHUGEPAGE), + __VM_FLAG(VM_MERGEABLE)); +#undef __VM_FLAG +} + static int show_smap(struct seq_file *m, void *v, int is_pid) { struct proc_maps_private *priv = m->private; @@ -535,6 +562,8 @@ static int show_smap(struct seq_file *m, seq_printf(m, "Nonlinear: %8lu kB\n", mss.nonlinear >> 10); + show_smap_vma_flags(m, vma); + if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task->mm)) ? vma->vm_start : 0; ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2012-10-19 9:45 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-10-18 9:55 [RFC] procfs: Add VmFlags field in smaps output Cyrill Gorcunov 2012-10-18 10:31 ` Cyrill Gorcunov 2012-10-18 21:02 ` Andrew Morton 2012-10-18 21:28 ` Cyrill Gorcunov 2012-10-19 9:45 ` Cyrill Gorcunov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox