From: Maxim Patlasov <mpatlasov@virtuozzo.com>
To: Al Viro <viro@ZenIV.linux.org.uk>
Cc: <linux-fsdevel@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<devel@openvz.org>
Subject: Re: [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal
Date: Tue, 16 Feb 2016 23:07:32 -0800 [thread overview]
Message-ID: <56C41C34.900@virtuozzo.com> (raw)
In-Reply-To: <20160216195423.GC17997@ZenIV.linux.org.uk>
[-- Attachment #1: Type: text/plain, Size: 3220 bytes --]
On 02/16/2016 11:54 AM, Al Viro wrote:
> On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
>> propagate_one(m) calculates "type" argument for copy_tree() like this:
>>
>>> if (m->mnt_group_id == last_dest->mnt_group_id) {
>>> type = CL_MAKE_SHARED;
>>> } else {
>>> type = CL_SLAVE;
>>> if (IS_MNT_SHARED(m))
>>> type |= CL_MAKE_SHARED;
>>> }
>> The "type" argument then governs clone_mnt() behavior with respect to flags
>> and mnt_master of new mount. When we iterate through a slave group, it is
>> possible that both current "m" and "last_dest" are not shared (although,
>> both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
>> above erroneously makes new mount shared and sets its mnt_master to
>> last_source->mnt_master. The patch fixes the problem by handling zero
>> mnt_group_id-s as though they are unequal.
>>
>> The similar problem exists in the implementation of "else" clause above
>> when we have to ascend upward in the master/slave tree by calling:
>>
>>> last_source = last_source->mnt_master;
>>> last_dest = last_source->mnt_parent;
>> proper number of times. The last step is governed by
>> "n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
>> both are zero. The patch fixes this case in the same way as the former one.
> Mind putting together a reproducer?
There are two files attached: reproducer1.c and reproducer2.c. The
former demonstrates the problem before applying the patch. The latter
demonstrates why the first hunk of the patch is not enough.
[root@f22ml ~]# reproducer1
main pid = 1496
monitor pid = 1497
child pid = 1498
grand-child pid = 1499
[root@f22ml ~]# grep "child" /proc/1496/mountinfo
243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1498/mountinfo
244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
rw,seclabel
[root@f22ml ~]# grep "child" /proc/1499/mountinfo
245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1497/mountinfo
246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
rw,seclabel
while expected info for 1497 would be:
246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
Now, assuming that only the first hunk of the patch is applied:
> - if (m->mnt_group_id == last_dest->mnt_group_id) {
> + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
[root@f22ml ~]# reproducer2
main pid = 1506
monitor pid = 1507
child pid = 1508
grand-child pid = 1509
[root@f22ml ~]# grep "child" /proc/1506/mountinfo
243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1508/mountinfo
244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1509/mountinfo
245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
[root@f22ml ~]# grep "child" /proc/1507/mountinfo
246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
while expected info for 1507 would be:
246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
Thanks,
Maxim
[-- Attachment #2: reproducer1.c --]
[-- Type: text/x-csrc, Size: 2440 bytes --]
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sched.h>
int main()
{
const char *child = "/tmp/child";
int ret;
printf("main pid = %d\n", getpid());
/* make our own private playground ... */
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret) {
perror("mount");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2");
exit(1);
}
/* fork monitor ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("monitor pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in monitor");
exit(1);
}
sleep(-1);
}
/* wait monitor to setup */
sleep(1);
/* fork child ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount in child");
exit(1);
}
ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2 in child");
exit(1);
}
if (!fork()) { /* grand-child */
printf("grand-child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in grand-child");
exit(1);
}
sleep(-1);
}
sleep(-1);
}
/* wait child and grand-child to setup */
sleep(1);
ret = mkdir(child, 0755);
if (ret && errno != EEXIST) {
perror("mkdir");
exit(1);
}
/* let "child" mount slip to everyone' namespaces ... */
ret = mount(child, child, NULL, MS_BIND, NULL);
if (ret) {
perror("bind mount");
exit(1);
}
sleep(-1);
}
[-- Attachment #3: reproducer2.c --]
[-- Type: text/x-csrc, Size: 2209 bytes --]
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sched.h>
int main()
{
const char *child = "/tmp/child";
int ret;
printf("main pid = %d\n", getpid());
/* make our own private playground ... */
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret) {
perror("mount");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount2");
exit(1);
}
/* fork monitor ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("monitor pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in monitor");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in monitor");
exit(1);
}
sleep(-1);
}
/* wait monitor to setup */
sleep(1);
/* fork child ... */
ret = fork();
if (ret < 0) {
perror("fork");
exit(1);
} else if (!ret) {
printf("child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in child");
exit(1);
}
if (!fork()) { /* grand-child */
printf("grand-child pid = %d\n", getpid());
ret = unshare(CLONE_NEWNS);
if (ret) {
perror("unshare in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
if (ret) {
perror("mount in grand-child");
exit(1);
}
ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
if (ret) {
perror("mount2 in grand-child");
exit(1);
}
sleep(-1);
}
sleep(-1);
}
/* wait child and grand-child to setup */
sleep(1);
ret = mkdir(child, 0755);
if (ret && errno != EEXIST) {
perror("mkdir");
exit(1);
}
/* let "child" mount slip to everyone' namespaces ... */
ret = mount(child, child, NULL, MS_BIND, NULL);
if (ret) {
perror("bind mount");
exit(1);
}
sleep(-1);
}
next prev parent reply other threads:[~2016-02-17 7:07 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-16 19:45 [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal Maxim Patlasov
2016-02-16 19:54 ` Al Viro
2016-02-17 7:07 ` Maxim Patlasov [this message]
2016-02-26 0:26 ` [Devel] " Andrew Vagin
2016-02-16 22:58 ` Andrew Vagin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56C41C34.900@virtuozzo.com \
--to=mpatlasov@virtuozzo.com \
--cc=devel@openvz.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.