[Devel] [PATCH RH8] ve/fs/binfmt: fix EBUSY on mounting second binfmt_misc in CT
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Wed Aug 11 15:41:39 MSK 2021
On 06.08.2021 17:29, Alexander Mikhalitsyn wrote:
> After rebase to RHEL 8.4 binfmt_misc fs uses new fscontext API,
> our implementation is incorrect. We have to use get_tree_keyed()
> helper instead of get_tree_single() which allows only one
> superblock per HN.
>
> Fixes: 90fb0e274 ("ve/fs/binfmt: virtualization")
>
> https://jira.sw.ru/browse/PSBM-132709
>
> Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
> ---
> fs/binfmt_misc.c | 42 ++++++++++++++++++++++++++++++++++++------
> 1 file changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
> index c21d03fd3f15..ad1d9d603034 100644
> --- a/fs/binfmt_misc.c
> +++ b/fs/binfmt_misc.c
> @@ -833,10 +833,8 @@ static const struct file_operations bm_status_operations = {
> static void bm_put_super(struct super_block *sb)
> {
> struct binfmt_misc *bm_data = BINFMT_MISC(sb);
> - struct ve_struct *ve = sb->s_fs_info;
>
> bm_data->enabled = 0;
> - put_ve(ve);
> }
>
> static const struct super_operations s_ops = {
> @@ -875,8 +873,6 @@ static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
> }
>
> sb->s_op = &s_ops;
> - sb->s_fs_info = ve;
> - get_ve(ve);
>
> bm_data->enabled = 1;
>
> @@ -885,10 +881,36 @@ static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
>
> static int bm_get_tree(struct fs_context *fc)
> {
> - return get_tree_single(fc, bm_fill_super);
> + struct ve_struct *ve = get_exec_env();
> +
> + /*
> + * We need one binfmt_misc superblock per VE,
> + * use get_tree_keyed() helper to get vfs_tree.
> + *
> + * It allows us to find sb by key (in our case ve is the key),
> + * and if it doesn't exists creates new.
> + *
> + * Important: we take ve refcnt here. It will be put
> + * in one of two places:
> + * 1. bm_free_fc() on error path (wrong mnt opt provided for instance)
> + * 2. bm_umount() when sb refcnt becomes zero (last mount umounted)
> + */
> + return get_tree_keyed(fc, bm_fill_super, get_ve(ve));
I can imagine that we can get here as much times as we want (N), each
mount syscall would get us here. So in case of no error paths we take N
references on same VE but ->kill_sb( would be called only once and we
release only one reference. This way VE would likely become stuck forever...
> +}
> +
> +static void bm_free_fc(struct fs_context *fc)
> +{
> + /*
> + * fc->s_fs_info will be NULL if *no* error was occured
> + * see fs/super.c sget_fc() helper
> + */
> + if (fc->s_fs_info)
> + put_ve(fc->s_fs_info);
> }
>
> +
> static const struct fs_context_operations bm_context_ops = {
> + .free = bm_free_fc,
> .get_tree = bm_get_tree,
> };
>
> @@ -898,6 +920,14 @@ static int bm_init_fs_context(struct fs_context *fc)
> return 0;
> }
>
> +static void bm_umount(struct super_block *sb)
> +{
> + struct ve_struct *ve = sb->s_fs_info;
> +
> + kill_litter_super(sb);
> + put_ve(ve);
> +}
> +
> static struct linux_binfmt misc_format = {
> .module = THIS_MODULE,
> .load_binary = load_misc_binary,
> @@ -907,7 +937,7 @@ static struct file_system_type bm_fs_type = {
> .owner = THIS_MODULE,
> .name = "binfmt_misc",
> .init_fs_context = bm_init_fs_context,
> - .kill_sb = kill_litter_super,
> + .kill_sb = bm_umount,
> .fs_flags = FS_VIRTUALIZED | FS_VE_MOUNT,
> };
> MODULE_ALIAS_FS("binfmt_misc");
>
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
More information about the Devel
mailing list