[Devel] [PATCH] fs/pnode.c: treat zero mnt_group_id-s as unequal

Andrew Vagin avagin at virtuozzo.com
Thu Feb 25 16:26:13 PST 2016


On Tue, Feb 16, 2016 at 11:07:32PM -0800, Maxim Patlasov wrote:
> On 02/16/2016 11:54 AM, Al Viro wrote:
> >On Tue, Feb 16, 2016 at 11:45:33AM -0800, Maxim Patlasov wrote:
> >>propagate_one(m) calculates "type" argument for copy_tree() like this:
> >>
> >>>    if (m->mnt_group_id == last_dest->mnt_group_id) {
> >>>        type = CL_MAKE_SHARED;
> >>>    } else {
> >>>        type = CL_SLAVE;
> >>>        if (IS_MNT_SHARED(m))
> >>>           type |= CL_MAKE_SHARED;
> >>>   }
> >>The "type" argument then governs clone_mnt() behavior with respect to flags
> >>and mnt_master of new mount. When we iterate through a slave group, it is
> >>possible that both current "m" and "last_dest" are not shared (although,
> >>both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
> >>above erroneously makes new mount shared and sets its mnt_master to
> >>last_source->mnt_master. The patch fixes the problem by handling zero
> >>mnt_group_id-s as though they are unequal.
> >>
> >>The similar problem exists in the implementation of "else" clause above
> >>when we have to ascend upward in the master/slave tree by calling:
> >>
> >>>    last_source = last_source->mnt_master;
> >>>    last_dest = last_source->mnt_parent;
> >>proper number of times. The last step is governed by
> >>"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
> >>both are zero. The patch fixes this case in the same way as the former one.
> >Mind putting together a reproducer?
> 
> There are two files attached: reproducer1.c and reproducer2.c. The former
> demonstrates the problem before applying the patch. The latter demonstrates
> why the first hunk of the patch is not enough.
> 
> [root at f22ml ~]# reproducer1
> main pid = 1496
> monitor pid = 1497
> child pid = 1498
> grand-child pid = 1499
> 
> [root at f22ml ~]# grep "child" /proc/1496/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1498/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:127 master:93 - tmpfs tmpfs
> rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1499/mountinfo
> 245 240 0:37 /child /tmp/child rw master:127 - tmpfs tmpfs rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1497/mountinfo
> 246 176 0:37 /child /tmp/child rw shared:128 master:127 - tmpfs tmpfs
> rw,seclabel
> 
> while expected info for 1497 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
>

Here is a simpler reproducer without additional namespaces and
processes.

[root at fc22-vm tmp]# cat test.sh 
set -e
d=`pwd`
mount -t tmpfs test $d
cd $d
mkdir root
mount -t tmpfs root root
mount --make-shared root
mkdir monitor
mount --bind root monitor/
mount --make-slave monitor/
mkdir child
mount --bind root child/
mount --make-slave child
mount --make-shared child/
mkdir grand_child
mount --bind child grand_child
mount --make-slave grand_child
mkdir root/test
mount --bind root/test root/test

cat /proc/self/mountinfo | grep $d
echo ---
cat /proc/self/mountinfo | grep monitor/test  | grep shared && echo FAIL || echo PASS
exit

[root at fc22-vm tmp]# bash test.sh 
80 61 0:41 / /root/tmp rw,relatime shared:32 - tmpfs test rw
82 80 0:42 / /root/tmp/root rw,relatime shared:33 - tmpfs root rw
84 80 0:42 / /root/tmp/monitor rw,relatime master:33 - tmpfs root rw
86 80 0:42 / /root/tmp/child rw,relatime shared:34 master:33 - tmpfs root rw
88 80 0:42 / /root/tmp/grand_child rw,relatime master:34 - tmpfs root rw
90 82 0:42 /test /root/tmp/root/test rw,relatime shared:33 - tmpfs root rw
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
92 88 0:42 /test /root/tmp/grand_child/test rw,relatime master:35 - tmpfs root rw
91 86 0:42 /test /root/tmp/child/test rw,relatime shared:35 master:33 - tmpfs root rw
---
94 84 0:42 /test /root/tmp/monitor/test rw,relatime shared:36 master:35 - tmpfs root rw
FAIL

> Now, assuming that only the first hunk of the patch is applied:
> 
> > -    if (m->mnt_group_id == last_dest->mnt_group_id) {
> > +    if (m->mnt_group_id && m->mnt_group_id == last_dest->mnt_group_id) {
> 
> [root at f22ml ~]# reproducer2
> main pid = 1506
> monitor pid = 1507
> child pid = 1508
> grand-child pid = 1509
> 
> [root at f22ml ~]# grep "child" /proc/1506/mountinfo
> 243 144 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1508/mountinfo
> 244 208 0:37 /child /tmp/child rw shared:93 - tmpfs tmpfs rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1509/mountinfo
> 245 240 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> [root at f22ml ~]# grep "child" /proc/1507/mountinfo
> 246 176 0:37 /child /tmp/child rw master:0 - tmpfs tmpfs rw,seclabel
> 
> while expected info for 1507 would be:
> 246 176 0:37 /child /tmp/child rw master:93 - tmpfs tmpfs rw,seclabel
> 
> Thanks,
> Maxim

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
> 
> int main()
> {
> 	const char *child  = "/tmp/child";
> 	int ret;
> 
> 	printf("main pid = %d\n", getpid());	
> 
> 	/* make our own private playground ... */
> 	ret = unshare(CLONE_NEWNS);
> 	if (ret) {
> 		perror("unshare");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> 	if (ret) {
> 		perror("mount");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 	if (ret) {
> 		perror("mount2");
> 		exit(1);
> 	}
> 
> 	/* fork monitor ... */
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("monitor pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount2 in monitor");
> 			exit(1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait monitor to setup */
> 	sleep(1);
> 
> 
> 	/* fork child ... */	
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("child pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in child");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount in child");
> 			exit(1);
> 		}
> 		ret = mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount2 in child");
> 			exit(1);
> 		}
> 
> 		if (!fork()) { /* grand-child */
> 			printf("grand-child pid = %d\n", getpid());
> 			ret = unshare(CLONE_NEWNS);
> 			if (ret) {
> 				perror("unshare in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 			if (ret) {
> 				perror("mount in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 			if (ret) {
> 				perror("mount2 in grand-child");
> 				exit(1);
> 			}
> 
> 			sleep(-1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait child and grand-child to setup */
> 	sleep(1);
> 	
> 	ret = mkdir(child, 0755);
> 	if (ret &&  errno != EEXIST) {
> 		perror("mkdir");
> 		exit(1);
> 	}
> 
> 	/* let "child" mount slip to everyone' namespaces ... */
> 	ret = mount(child, child, NULL, MS_BIND, NULL);
> 	if (ret) {
> 		perror("bind mount");
> 		exit(1);
> 	}
> 
> 	sleep(-1);
> }
> 

> #define _GNU_SOURCE
> #include <stdio.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <sys/stat.h>
> #include <sys/types.h>
> #include <errno.h>
> #include <sys/mount.h>
> #include <sys/syscall.h>
> #include <sched.h>
> 
> int main()
> {
> 	const char *child  = "/tmp/child";
> 	int ret;
> 
> 	printf("main pid = %d\n", getpid());	
> 
> 	/* make our own private playground ... */
> 	ret = unshare(CLONE_NEWNS);
> 	if (ret) {
> 		perror("unshare");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
> 	if (ret) {
> 		perror("mount");
> 		exit(1);
> 	}
> 
> 	ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 	if (ret) {
> 		perror("mount2");
> 		exit(1);
> 	}
> 
> 	/* fork monitor ... */
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("monitor pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 		if (ret) {
> 			perror("mount in monitor");
> 			exit(1);
> 		}
> 
> 		ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 		if (ret) {
> 			perror("mount2 in monitor");
> 			exit(1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait monitor to setup */
> 	sleep(1);
> 
> 
> 	/* fork child ... */	
> 	ret = fork();
> 	if (ret < 0) {
> 		perror("fork");
> 		exit(1);
> 	} else if (!ret) {
> 		printf("child pid = %d\n", getpid());
> 
> 		ret = unshare(CLONE_NEWNS);
> 		if (ret) {
> 			perror("unshare in child");
> 			exit(1);
> 		}
> 
> 		if (!fork()) { /* grand-child */
> 			printf("grand-child pid = %d\n", getpid());
> 			ret = unshare(CLONE_NEWNS);
> 			if (ret) {
> 				perror("unshare in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SHARED, NULL);
> 			if (ret) {
> 				perror("mount in grand-child");
> 				exit(1);
> 			}
> 
> 			ret = mount("none", "/", NULL, MS_REC|MS_SLAVE, NULL);
> 			if (ret) {
> 				perror("mount2 in grand-child");
> 				exit(1);
> 			}
> 
> 			sleep(-1);
> 		}
> 
> 		sleep(-1);
> 	}
> 
> 	/* wait child and grand-child to setup */
> 	sleep(1);
> 	
> 	ret = mkdir(child, 0755);
> 	if (ret &&  errno != EEXIST) {
> 		perror("mkdir");
> 		exit(1);
> 	}
> 
> 	/* let "child" mount slip to everyone' namespaces ... */
> 	ret = mount(child, child, NULL, MS_BIND, NULL);
> 	if (ret) {
> 		perror("bind mount");
> 		exit(1);
> 	}
> 
> 	sleep(-1);
> }
> 

> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel



More information about the Devel mailing list