[Devel] [PATCH RHEL9 COMMIT] binfmt_misc: fix mount after umount in CT
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Oct 25 16:57:10 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.17
------>
commit b5caa8e3e42efeba5d4e57ce79dcb1562047396b
Author: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
Date: Mon Oct 25 16:57:10 2021 +0300
binfmt_misc: fix mount after umount in CT
The assumption that bm_fill_super() is not called for the second time
for CT is wrong: umount operation clears sb->s_root, which causes
vfs_get_super() to call fill_super again on the next mount.
Make bm_fill_super() handle multiple-calls corrently:
- initialize bm_data and set ve->binfmt_misc only if it is not done
before,
- delay desctruction of it up to CT destruction.
https://jira.sw.ru/browse/PSBM-133968
Fixes: edb6893b99b2 ("ve/fs/binfmt: virtualization")
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
fs/binfmt_misc.c | 58 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 32 insertions(+), 26 deletions(-)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 628d4fc2db94..a7ec4daff163 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -805,39 +805,32 @@ static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
/* last one */ {""}
};
+#ifdef CONFIG_VE
struct ve_struct *ve = get_exec_env();
- struct binfmt_misc *bm_data;
+ struct binfmt_misc *bm_data = ve->binfmt_misc;
+#else
+ static struct binfmt_misc *bm_data = NULL;
+#endif
- /*
- * bm_get_tree()
- * get_tree_keyed(fc, bm_fill_super, get_ve(ve))
- * fc->s_fs_info = current VE
- * vfs_get_super(fc, vfs_get_keyed_super, bm_fill_super)
- * sb = sget_fc(fc, test, set_anon_super_fc)
- * if (!sb->s_root) {
- * err = bm_fill_super(sb, fc);
- *
- * => we should never get here with initialized ve->binfmt_misc.
- */
- if (WARN_ON_ONCE(ve->binfmt_misc))
- return -EEXIST;
+ if (!bm_data) {
+ bm_data = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL);
+ if (!bm_data)
+ return -ENOMEM;
- bm_data = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL);
- if (!bm_data)
- return -ENOMEM;
+ INIT_LIST_HEAD(&bm_data->entries);
+ rwlock_init(&bm_data->entries_lock);
- INIT_LIST_HEAD(&bm_data->entries);
- rwlock_init(&bm_data->entries_lock);
+#ifdef CONFIG_VE
+ ve->binfmt_misc = bm_data;
+ /* this will be cleared by ve_destroy() */
+#endif
+ }
err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
- if (err) {
- kfree(bm_data);
+ if (err)
return err;
- }
sb->s_op = &s_ops;
-
- ve->binfmt_misc = bm_data;
bm_data->enabled = 1;
return 0;
@@ -909,6 +902,7 @@ static struct file_system_type bm_fs_type = {
};
MODULE_ALIAS_FS("binfmt_misc");
+#ifdef CONFIG_VE
static void ve_binfmt_fini(void *data)
{
struct ve_struct *ve = data;
@@ -918,8 +912,17 @@ static void ve_binfmt_fini(void *data)
return;
/*
- * XXX: Note we don't take any locks here. This is safe as long as
- * nobody uses binfmt_misc outside the owner ve.
+ * This is called when VE is being destructed, no more processes are
+ * in VE and thus use of bm_data is unexpected.
+ *
+ * Still, there is a possibility for a race, if a host process
+ * explicitly enters VE's mount namespace and accesses files on
+ * binfmt_misc mount, while VE is being destructed.
+ *
+ * This is extremely unlikely so ignore it for now.
+ *
+ * To fix, need to move this to ve_destroy() path that is executed when
+ * no more references to VE are left.
*/
while (!list_empty(&bm_data->entries))
kill_node(bm_data, list_first_entry(
@@ -931,6 +934,9 @@ static struct ve_hook ve_binfmt_hook = {
.priority = HOOK_PRIO_DEFAULT,
.owner = THIS_MODULE,
};
+#else
+#define ve_binfmt_hook 0
+#endif
static int __init init_misc_binfmt(void)
{
More information about the Devel
mailing list