[Devel] [PATCH vz8] fs/kernfs: remove bogus kernfs_test_ve() check

Andrey Ryabinin aryabinin at virtuozzo.com
Mon Jul 13 15:58:03 MSK 2020


cgroups now also use kernfs subsystem. kernfs_test_ve() check breaks
it when we try to mount cgroup from ve.

cgroup1_mount() tries to reuse the existing superblock by calling
kernfs_pin_sb to pin it during mount operation

cgroup1_mount():
		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
		...

Then it calls cgroup_do_mount() which fails to reuse the existing
superblock because of failed kernfs_test_ve() check, so instead
it creates a new one:

	dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
				 CGROUP_SUPER_MAGIC, ns);

So now we have two superblocks instead of one which may lead to
the double percpu_ref_kill_and_confirm() call. E.g. :

umount /sys/fs/cgroup/rdma
vzctl exec CTID umount /sys/fs/cgroup/rdma

	------------[ cut here ]------------
	percpu_ref_kill_and_confirm called more than once on css_release!
	WARNING: CPU: 2 PID: 2664 at lib/percpu-refcount.c:336 percpu_ref_kill_and_confirm+0x7c/0xa0
	CPU: 2 PID: 2664 Comm: kworker/2:5 ve: / Tainted: G                 --------- -t - 4.18.0-80.1.2.vz8.3.12 #1 3.12
	RIP: 0010:percpu_ref_kill_and_confirm+0x7c/0xa0
	Call Trace:
	 cgroup_kill_sb+0x64/0x90
	 deactivate_locked_super+0x34/0x70
	 cleanup_mnt+0x3b/0x70
	 delayed_mntput+0x26/0x40
	 process_one_work+0x1a7/0x360
	 worker_thread+0x30/0x390
	 kthread+0x112/0x130
	 ret_from_fork+0x35/0x40
	---[ end trace 0cb674141e1a86e3 ]---
	CT: d3cd4dca-ce04-475d-b5d5-c9ef1f24642a: stopped

Remove the kernfs_test_ve() check to fix this.

As for sysfs which is also uses kernfs, I don't think it gonna cause
something bad. Sysfs uses net namespaces (info->ns && sb_info->ns) which
should be enough for us. We enter CT's net ns before mounting sysfs in CT.

Also note that commands like "vzctl set 101 --netif_add eth0 --save"
calls ip util (which mount sysfs) from ve0 and CT's net namespace.
So with the removal of kernfs_test_ve() check kernfs_test_super()
now return success instead of fail as before. Which is totally
fine IMO, as we reuse the existing CT's sysfs superblock
instead of creating a new one.

https://jira.sw.ru/browse/PSBM-104902
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 fs/kernfs/kernfs-ve.h | 9 ---------
 fs/kernfs/mount.c     | 3 ---
 fs/kernfs/ve.c        | 6 ------
 3 files changed, 18 deletions(-)

diff --git a/fs/kernfs/kernfs-ve.h b/fs/kernfs/kernfs-ve.h
index f1d497b1b1a5..87c480b21d0d 100644
--- a/fs/kernfs/kernfs-ve.h
+++ b/fs/kernfs/kernfs-ve.h
@@ -16,9 +16,6 @@ struct kmapset;
 
 #ifdef CONFIG_VE
 
-int kernfs_test_ve(struct kernfs_super_info *sb_info,
-		   struct kernfs_super_info *info);
-
 void kernfs_get_ve_perms(struct kernfs_node *kn);
 void kernfs_put_ve_perms(struct kernfs_node *kn);
 
@@ -31,12 +28,6 @@ bool kernfs_d_visible(struct kernfs_node *kn, struct kernfs_super_info *info);
 
 #else // CONFIG_VE
 
-static inline int kernfs_test_ve(struct kernfs_super_info *sb_info,
-				 struct kernfs_super_info *info)
-{
-	return 0;
-}
-
 void kernfs_get_ve_perms(struct kernfs_node *kn) { }
 void kernfs_put_ve_perms(struct kernfs_node *kn) { }
 
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f339a037f90e..0cbb55dd6dca 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -262,9 +262,6 @@ static int kernfs_test_super(struct super_block *sb, void *data)
 	struct kernfs_super_info *sb_info = kernfs_info(sb);
 	struct kernfs_super_info *info = data;
 
-	if (!kernfs_test_ve(sb_info, info))
-		return 0;
-
 	return sb_info->root == info->root && sb_info->ns == info->ns;
 }
 
diff --git a/fs/kernfs/ve.c b/fs/kernfs/ve.c
index 72e45daa23ae..0c4c28cc745d 100644
--- a/fs/kernfs/ve.c
+++ b/fs/kernfs/ve.c
@@ -49,12 +49,6 @@ int kernfs_ve_allowed(struct kernfs_node *kn)
 	return !kn->ve_perms_map || ve_is_super(get_exec_env());
 }
 
-int kernfs_test_ve(struct kernfs_super_info *sb_info,
-		   struct kernfs_super_info *info)
-{
-	return sb_info->ve == info->ve;
-}
-
 static struct kmapset_key *kernfs_info_perms_key(struct kernfs_super_info *info)
 {
 	return (void *)get_exec_env() + info->ve_perms_off;
-- 
2.26.2



More information about the Devel mailing list