From: Andrew Vagin <avagin@virtuozzo.com>
To: Maxim Patlasov <mpatlasov@virtuozzo.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>,
<linux-fsdevel@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<devel@openvz.org>
Subject: Re: [Devel] [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal
Date: Thu, 25 Feb 2016 16:26:13 -0800 [thread overview]
Message-ID: <20160226002613.GA31970@odin.com> (raw)
In-Reply-To: <56C41C34.900@virtuozzo.com>
On Tue, Feb 16, 2016 at 11:07:32PM -0800, Maxim Patlasov wrote:
> On 02/16/2016 11:54 AM, Al Viro wrote:
> >On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
> >>propagate_one(m) calculates "type" argument for copy_tree() like this:
> >>
> >>> if (m->mnt_group_id == last_dest->mnt_group_id) {
> >>> type = CL_MAKE_SHARED;
> >>> } else {
> >>> type = CL_SLAVE;
> >>> if (IS_MNT_SHARED(m))
> >>> type |= CL_MAKE_SHARED;
> >>> }
> >>The "type" argument then governs clone_mnt() behavior with respect to flags
> >>and mnt_master of new mount. When we iterate through a slave group, it is
> >>possible that both current "m" and "last_dest" are not shared (although,
> >>both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
> >>above erroneously makes new mount shared and sets its mnt_master to
> >>last_source->mnt_master. The patch fixes the problem by handling zero
> >>mnt_group_id-s as though they are unequal.
> >>
> >>The similar problem exists in the implementation of "else" clause above
> >>when we have to ascend upward in the master/slave tree by calling:
> >>
> >>> last_source = last_source->mnt_master;
> >>> last_dest = last_source->mnt_parent;
> >>proper number of times. The last step is governed by
> >>"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
> >>both are zero. The patch fixes this case in the same way as the former one.
> >Mind putting together a reproducer?
>
> There are two files attached: reproducer1.c and reproducer2.c. The former
> demonstrates the problem before applying the patch. The latter demonstrates
> why the first hunk of the patch is not enough.
>
> [root@f22ml ~]# reproducer1
> main pid = 1496
> monitor pid = 1497
> child pid = 1498
> grand-child pid = 1499
>
> [root@f22ml ~]# grep "child" /proc/1496/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1498/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
> rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1499/mountinfo
> 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1497/mountinfo
> 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
> rw,seclabel
>
> while expected info for 1497 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>
Here is a simpler reproducer without additional namespaces and
processes.
[root@fc22-vm tmp]# cat test.sh
set -e
d=`pwd`
mount -t tmpfs test $d
cd $d
mkdir root
mount -t tmpfs root root
mount --make-shared root
mkdir monitor
mount --bind root monitor/
mount --make-slave monitor/
mkdir child
mount --bind root child/
mount --make-slave child
mount --make-shared child/
mkdir grand_child
mount --bind child grand_child
mount --make-slave grand_child
mkdir root/test
mount --bind root/test root/test
cat /proc/self/mountinfo | grep $d
echo ---
cat /proc/self/mountinfo | grep monitor/test | grep shared && echo FAIL || echo PASS
exit
[root@fc22-vm tmp]# bash test.sh
80 61 0:41 / /root/tmp rw,relatime shared:32 - tmpfs test rw
82 80 0:42 / /root/tmp/root rw,relatime shared:33 - tmpfs root rw
84 80 0:42 / /root/tmp/monitor rw,relatime master:33 - tmpfs root rw
86 80 0:42 / /root/tmp/child rw,relatime shared:34 master:33 - tmpfs root rw
88 80 0:42 / /root/tmp/grand_child rw,relatime master:34 - tmpfs root rw
90 82 0:42 /test /root/tmp/root/test rw,relatime shared:33 - tmpfs root rw
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
92 88 0:42 /test /root/tmp/grand_child/test rw,relatime master:35 - tmpfs root rw
91 86 0:42 /test /root/tmp/child/test rw,relatime shared:35 master:33 - tmpfs root rw
---
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
FAIL
> Now, assuming that only the first hunk of the patch is applied:
>
> > - if (m->mnt_group_id == last_dest->mnt_group_id) {
> > + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
>
> [root@f22ml ~]# reproducer2
> main pid = 1506
> monitor pid = 1507
> child pid = 1508
> grand-child pid = 1509
>
> [root@f22ml ~]# grep "child" /proc/1506/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1508/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1509/mountinfo
> 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1507/mountinfo
> 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
>
> while expected info for 1507 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>
> Thanks,
> Maxim
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
>
> int main()
> {
> const char *child = "/tmp/child";
> int ret;
>
> printf("main pid = %d\n", getpid());
>
> /* make our own private playground ... */
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> if (ret) {
> perror("mount");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2");
> exit(1);
> }
>
> /* fork monitor ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("monitor pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in monitor");
> exit(1);
> }
>
> sleep(-1);
> }
>
> /* wait monitor to setup */
> sleep(1);
>
>
> /* fork child ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("child pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount in child");
> exit(1);
> }
> ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2 in child");
> exit(1);
> }
>
> if (!fork()) { /* grand-child */
> printf("grand-child pid = %d\n", getpid());
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in grand-child");
> exit(1);
> }
>
> sleep(-1);
> }
>
> sleep(-1);
> }
>
> /* wait child and grand-child to setup */
> sleep(1);
>
> ret = mkdir(child, 0755);
> if (ret && errno != EEXIST) {
> perror("mkdir");
> exit(1);
> }
>
> /* let "child" mount slip to everyone' namespaces ... */
> ret = mount(child, child, NULL, MS_BIND, NULL);
> if (ret) {
> perror("bind mount");
> exit(1);
> }
>
> sleep(-1);
> }
>
> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
>
> int main()
> {
> const char *child = "/tmp/child";
> int ret;
>
> printf("main pid = %d\n", getpid());
>
> /* make our own private playground ... */
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> if (ret) {
> perror("mount");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount2");
> exit(1);
> }
>
> /* fork monitor ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("monitor pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in monitor");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in monitor");
> exit(1);
> }
>
> sleep(-1);
> }
>
> /* wait monitor to setup */
> sleep(1);
>
>
> /* fork child ... */
> ret = fork();
> if (ret < 0) {
> perror("fork");
> exit(1);
> } else if (!ret) {
> printf("child pid = %d\n", getpid());
>
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in child");
> exit(1);
> }
>
> if (!fork()) { /* grand-child */
> printf("grand-child pid = %d\n", getpid());
> ret = unshare(CLONE_NEWNS);
> if (ret) {
> perror("unshare in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> if (ret) {
> perror("mount in grand-child");
> exit(1);
> }
>
> ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> if (ret) {
> perror("mount2 in grand-child");
> exit(1);
> }
>
> sleep(-1);
> }
>
> sleep(-1);
> }
>
> /* wait child and grand-child to setup */
> sleep(1);
>
> ret = mkdir(child, 0755);
> if (ret && errno != EEXIST) {
> perror("mkdir");
> exit(1);
> }
>
> /* let "child" mount slip to everyone' namespaces ... */
> ret = mount(child, child, NULL, MS_BIND, NULL);
> if (ret) {
> perror("bind mount");
> exit(1);
> }
>
> sleep(-1);
> }
>
> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
next prev parent reply other threads:[~2016-02-26 0:26 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-16 19:45 [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal Maxim Patlasov
2016-02-16 19:54 ` Al Viro
2016-02-17 7:07 ` Maxim Patlasov
2016-02-26 0:26 ` Andrew Vagin [this message]
2016-02-16 22:58 ` [Devel] " Andrew Vagin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160226002613.GA31970@odin.com \
--to=avagin@virtuozzo.com \
--cc=devel@openvz.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mpatlasov@virtuozzo.com \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.