From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Subject: Re: [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal To: Al Viro References: <20160216194527.9291.54622.stgit@maxim-thinkpad> <20160216195423.GC17997@ZenIV.linux.org.uk> CC: , , From: Maxim Patlasov Message-ID: <56C41C34.900@virtuozzo.com> Date: Tue, 16 Feb 2016 23:07:32 -0800 MIME-Version: 1.0 In-Reply-To: <20160216195423.GC17997@ZenIV.linux.org.uk> Content-Type: multipart/mixed; boundary="------------040505080001020009000103" Sender: linux-kernel-owner@vger.kernel.org List-ID: --------------040505080001020009000103 Content-Type: text/plain; charset="windows-1252"; format=flowed Content-Transfer-Encoding: 7bit On 02/16/2016 11:54 AM, Al Viro wrote: > On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote: >> propagate_one(m) calculates "type" argument for copy_tree() like this: >> >>> if (m->mnt_group_id == last_dest->mnt_group_id) { >>> type = CL_MAKE_SHARED; >>> } else { >>> type = CL_SLAVE; >>> if (IS_MNT_SHARED(m)) >>> type |= CL_MAKE_SHARED; >>> } >> The "type" argument then governs clone_mnt() behavior with respect to flags >> and mnt_master of new mount. When we iterate through a slave group, it is >> possible that both current "m" and "last_dest" are not shared (although, >> both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison >> above erroneously makes new mount shared and sets its mnt_master to >> last_source->mnt_master. The patch fixes the problem by handling zero >> mnt_group_id-s as though they are unequal. >> >> The similar problem exists in the implementation of "else" clause above >> when we have to ascend upward in the master/slave tree by calling: >> >>> last_source = last_source->mnt_master; >>> last_dest = last_source->mnt_parent; >> proper number of times. The last step is governed by >> "n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if >> both are zero. The patch fixes this case in the same way as the former one. > Mind putting together a reproducer? There are two files attached: reproducer1.c and reproducer2.c. The former demonstrates the problem before applying the patch. The latter demonstrates why the first hunk of the patch is not enough. [root@f22ml ~]# reproducer1 main pid = 1496 monitor pid = 1497 child pid = 1498 grand-child pid = 1499 [root@f22ml ~]# grep "child" /proc/1496/mountinfo 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1498/mountinfo 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1499/mountinfo 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1497/mountinfo 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs rw,seclabel while expected info for 1497 would be: 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel Now, assuming that only the first hunk of the patch is applied: > - if (m->mnt_group_id == last_dest->mnt_group_id) { > + if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) { [root@f22ml ~]# reproducer2 main pid = 1506 monitor pid = 1507 child pid = 1508 grand-child pid = 1509 [root@f22ml ~]# grep "child" /proc/1506/mountinfo 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1508/mountinfo 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1509/mountinfo 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel [root@f22ml ~]# grep "child" /proc/1507/mountinfo 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel while expected info for 1507 would be: 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel Thanks, Maxim --------------040505080001020009000103 Content-Type: text/x-csrc; name="reproducer1.c" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="reproducer1.c" #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include int main() { const char *child = "/tmp/child"; int ret; printf("main pid = %d\n", getpid()); /* make our own private playground ... */ ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); if (ret) { perror("mount"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount2"); exit(1); } /* fork monitor ... */ ret = fork(); if (ret < 0) { perror("fork"); exit(1); } else if (!ret) { printf("monitor pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in monitor"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount in monitor"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); if (ret) { perror("mount2 in monitor"); exit(1); } sleep(-1); } /* wait monitor to setup */ sleep(1); /* fork child ... */ ret = fork(); if (ret < 0) { perror("fork"); exit(1); } else if (!ret) { printf("child pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in child"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); if (ret) { perror("mount in child"); exit(1); } ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount2 in child"); exit(1); } if (!fork()) { /* grand-child */ printf("grand-child pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in grand-child"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount in grand-child"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); if (ret) { perror("mount2 in grand-child"); exit(1); } sleep(-1); } sleep(-1); } /* wait child and grand-child to setup */ sleep(1); ret = mkdir(child, 0755); if (ret && errno != EEXIST) { perror("mkdir"); exit(1); } /* let "child" mount slip to everyone' namespaces ... */ ret = mount(child, child, NULL, MS_BIND, NULL); if (ret) { perror("bind mount"); exit(1); } sleep(-1); } --------------040505080001020009000103 Content-Type: text/x-csrc; name="reproducer2.c" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="reproducer2.c" #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include int main() { const char *child = "/tmp/child"; int ret; printf("main pid = %d\n", getpid()); /* make our own private playground ... */ ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); if (ret) { perror("mount"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount2"); exit(1); } /* fork monitor ... */ ret = fork(); if (ret < 0) { perror("fork"); exit(1); } else if (!ret) { printf("monitor pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in monitor"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount in monitor"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); if (ret) { perror("mount2 in monitor"); exit(1); } sleep(-1); } /* wait monitor to setup */ sleep(1); /* fork child ... */ ret = fork(); if (ret < 0) { perror("fork"); exit(1); } else if (!ret) { printf("child pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in child"); exit(1); } if (!fork()) { /* grand-child */ printf("grand-child pid = %d\n", getpid()); ret = unshare(CLONE_NEWNS); if (ret) { perror("unshare in grand-child"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL); if (ret) { perror("mount in grand-child"); exit(1); } ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL); if (ret) { perror("mount2 in grand-child"); exit(1); } sleep(-1); } sleep(-1); } /* wait child and grand-child to setup */ sleep(1); ret = mkdir(child, 0755); if (ret && errno != EEXIST) { perror("mkdir"); exit(1); } /* let "child" mount slip to everyone' namespaces ... */ ret = mount(child, child, NULL, MS_BIND, NULL); if (ret) { perror("bind mount"); exit(1); } sleep(-1); } --------------040505080001020009000103--