linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Vagin <avagin@virtuozzo.com>
To: Maxim Patlasov <mpatlasov@virtuozzo.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>,
	<linux-fsdevel@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<devel@openvz.org>
Subject: Re: [Devel] [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal
Date: Thu, 25 Feb 2016 16:26:13 -0800	[thread overview]
Message-ID: <20160226002613.GA31970@odin.com> (raw)
In-Reply-To: <56C41C34.900@virtuozzo.com>

On Tue, Feb 16, 2016 at 11:07:32PM -0800, Maxim Patlasov wrote:
> On 02/16/2016 11:54 AM, Al Viro wrote:
> >On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
> >>propagate_one(m) calculates "type" argument for copy_tree() like this:
> >>
> >>>    if (m->mnt_group_id == last_dest->mnt_group_id) {
> >>>        type = CL_MAKE_SHARED;
> >>>    } else {
> >>>        type = CL_SLAVE;
> >>>        if (IS_MNT_SHARED(m))
> >>>           type |= CL_MAKE_SHARED;
> >>>   }
> >>The "type" argument then governs clone_mnt() behavior with respect to flags
> >>and mnt_master of new mount. When we iterate through a slave group, it is
> >>possible that both current "m" and "last_dest" are not shared (although,
> >>both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
> >>above erroneously makes new mount shared and sets its mnt_master to
> >>last_source->mnt_master. The patch fixes the problem by handling zero
> >>mnt_group_id-s as though they are unequal.
> >>
> >>The similar problem exists in the implementation of "else" clause above
> >>when we have to ascend upward in the master/slave tree by calling:
> >>
> >>>    last_source = last_source->mnt_master;
> >>>    last_dest = last_source->mnt_parent;
> >>proper number of times. The last step is governed by
> >>"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
> >>both are zero. The patch fixes this case in the same way as the former one.
> >Mind putting together a reproducer?
> 
> There are two files attached: reproducer1.c and reproducer2.c. The former
> demonstrates the problem before applying the patch. The latter demonstrates
> why the first hunk of the patch is not enough.
> 
> [root@f22ml ~]# reproducer1
> main pid = 1496
> monitor pid = 1497
> child pid = 1498
> grand-child pid = 1499
> 
> [root@f22ml ~]# grep "child" /proc/1496/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1498/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
> rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1499/mountinfo
> 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1497/mountinfo
> 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
> rw,seclabel
> 
> while expected info for 1497 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>

Here is a simpler reproducer without additional namespaces and
processes.

[root@fc22-vm tmp]# cat test.sh 
set -e
d=`pwd`
mount -t tmpfs test $d
cd $d
mkdir root
mount -t tmpfs root root
mount --make-shared root
mkdir monitor
mount --bind root monitor/
mount --make-slave monitor/
mkdir child
mount --bind root child/
mount --make-slave child
mount --make-shared child/
mkdir grand_child
mount --bind child grand_child
mount --make-slave grand_child
mkdir root/test
mount --bind root/test root/test

cat /proc/self/mountinfo | grep $d
echo ---
cat /proc/self/mountinfo | grep monitor/test  | grep shared && echo FAIL || echo PASS
exit

[root@fc22-vm tmp]# bash test.sh 
80 61 0:41 / /root/tmp rw,relatime shared:32 - tmpfs test rw
82 80 0:42 / /root/tmp/root rw,relatime shared:33 - tmpfs root rw
84 80 0:42 / /root/tmp/monitor rw,relatime master:33 - tmpfs root rw
86 80 0:42 / /root/tmp/child rw,relatime shared:34 master:33 - tmpfs root rw
88 80 0:42 / /root/tmp/grand_child rw,relatime master:34 - tmpfs root rw
90 82 0:42 /test /root/tmp/root/test rw,relatime shared:33 - tmpfs root rw
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
92 88 0:42 /test /root/tmp/grand_child/test rw,relatime master:35 - tmpfs root rw
91 86 0:42 /test /root/tmp/child/test rw,relatime shared:35 master:33 - tmpfs root rw
---
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
FAIL

> Now, assuming that only the first hunk of the patch is applied:
> 
> > -    if (m->mnt_group_id == last_dest->mnt_group_id) {
> > +    if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
> 
> [root@f22ml ~]# reproducer2
> main pid = 1506
> monitor pid = 1507
> child pid = 1508
> grand-child pid = 1509
> 
> [root@f22ml ~]# grep "child" /proc/1506/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1508/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1509/mountinfo
> 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> [root@f22ml ~]# grep "child" /proc/1507/mountinfo
> 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
> 
> while expected info for 1507 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> 
> Thanks,
> Maxim

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
> 
> int main()
> {
> 	const char *child  = "/tmp/child";
> 	int ret;
> 
> 	printf("main pid = %d\n", getpid());	
> 
> 	/* make our own private playground ... */
> 	ret = unshare(CLONE_NEWNS);
> 	if (ret) {
> 		perror("unshare");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> 	if (ret) {
> 		perror("mount");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 	if (ret) {
> 		perror("mount2");
> 		exit(1);
> 	}
> 
> 	/* fork monitor ... */
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("monitor pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount2 in monitor");
> 			exit(1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait monitor to setup */
> 	sleep(1);
> 
> 
> 	/* fork child ... */	
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("child pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in child");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount in child");
> 			exit(1);
> 		}
> 		ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount2 in child");
> 			exit(1);
> 		}
> 
> 		if (!fork()) { /* grand-child */
> 			printf("grand-child pid = %d\n", getpid());
> 			ret = unshare(CLONE_NEWNS);
> 			if (ret) {
> 				perror("unshare in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 			if (ret) {
> 				perror("mount in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 			if (ret) {
> 				perror("mount2 in grand-child");
> 				exit(1);
> 			}
> 
> 			sleep(-1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait child and grand-child to setup */
> 	sleep(1);
> 	
> 	ret = mkdir(child, 0755);
> 	if (ret &&  errno != EEXIST) {
> 		perror("mkdir");
> 		exit(1);
> 	}
> 
> 	/* let "child" mount slip to everyone' namespaces ... */
> 	ret = mount(child, child, NULL, MS_BIND, NULL);
> 	if (ret) {
> 		perror("bind mount");
> 		exit(1);
> 	}
> 
> 	sleep(-1);
> }
> 

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
> 
> int main()
> {
> 	const char *child  = "/tmp/child";
> 	int ret;
> 
> 	printf("main pid = %d\n", getpid());	
> 
> 	/* make our own private playground ... */
> 	ret = unshare(CLONE_NEWNS);
> 	if (ret) {
> 		perror("unshare");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> 	if (ret) {
> 		perror("mount");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 	if (ret) {
> 		perror("mount2");
> 		exit(1);
> 	}
> 
> 	/* fork monitor ... */
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("monitor pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount2 in monitor");
> 			exit(1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait monitor to setup */
> 	sleep(1);
> 
> 
> 	/* fork child ... */	
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("child pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in child");
> 			exit(1);
> 		}
> 
> 		if (!fork()) { /* grand-child */
> 			printf("grand-child pid = %d\n", getpid());
> 			ret = unshare(CLONE_NEWNS);
> 			if (ret) {
> 				perror("unshare in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 			if (ret) {
> 				perror("mount in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 			if (ret) {
> 				perror("mount2 in grand-child");
> 				exit(1);
> 			}
> 
> 			sleep(-1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait child and grand-child to setup */
> 	sleep(1);
> 	
> 	ret = mkdir(child, 0755);
> 	if (ret &&  errno != EEXIST) {
> 		perror("mkdir");
> 		exit(1);
> 	}
> 
> 	/* let "child" mount slip to everyone' namespaces ... */
> 	ret = mount(child, child, NULL, MS_BIND, NULL);
> 	if (ret) {
> 		perror("bind mount");
> 		exit(1);
> 	}
> 
> 	sleep(-1);
> }
> 

> _______________________________________________
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel

  reply	other threads:[~2016-02-26  0:26 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-16 19:45 [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal Maxim Patlasov
2016-02-16 19:54 ` Al Viro
2016-02-17  7:07   ` Maxim Patlasov
2016-02-26  0:26     ` Andrew Vagin [this message]
2016-02-16 22:58 ` [Devel] " Andrew Vagin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160226002613.GA31970@odin.com \
    --to=avagin@virtuozzo.com \
    --cc=devel@openvz.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mpatlasov@virtuozzo.com \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).