[Devel] [PATCH v4 VZ10 1/1] fs: namespace: transform mount flags to comma separated values

Vasileios Almpanis vasileios.almpanis at virtuozzo.com
Fri Jun 26 11:53:02 MSK 2026


On 6/19/26 9:19 PM, Konstantin Khorenko wrote:
>    Regression found (1, High)
>
>    legacy_merge_mount_data() allocates with __get_free_page() (not zeroed) and only writes via strscpy(). When data is
>    empty/NULL and no sb flags are emitted, off stays 0, page[0] is never written, and the uninitialized, non-NUL-terminated
>    page is returned and parsed as the option string. This is the common case - an option-less container device mount
>    mount(dev, dir, "ext4", flags, NULL) (NULL data, plain rw mount → none of the common_set bits, sb_flags_mask == 0).
>    Result: security_sb_eat_lsm_opts() / strsep() walk garbage → spurious mount failure or unintended options, and a possible
>    OOB read past the page if it holds no NUL within PAGE_SIZE. Fix: terminate the buffer (page[off] = '\0' before return
Valid issue. I will send v5 with the fix
>    page;, consistent with the non-empty cases) or use get_zeroed_page(). Details in review-inline.txt.
>
>    FINAL REGRESSIONS FOUND: 1   (uninitialized mount-options page; High)
>
> --
> Best regards,
>
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
>
> On 6/3/26 17:03, Vasileios Almpanis wrote:
>> In legacy mount callpaths, userspace might pass mount options as
>> flags. These flags escape our checks in ve_devmnt_process allowing
>> devices to be mounted inside containers with options not specified in
>> the allowed field. Introduce helpers that take these flags and
>> already existing tables of flag -> string representation to construct
>> a comma separated value string from them, and append them to userspace
>> provided data. Then pass this string to parse_monolithic_mount_data
>> enforcing the same checks symmetrically in both mount and fsconfig
>> syscalls.
>>
>> In the remount path, run legacy_merge_mount_data() before
>> ve_devmnt_process() so container device mount policy sees MS_* flags
>> from the legacy mount(2) API, not only the user-supplied option string.
>> Keep ve_prepare_mount_options() for legacy parsers that do not use
>> generic_parse_monolithic().
>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-132330
>> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
>>
>> Feature: ve: ve generic structures
>> ---
>> Changes since v3:
>>    - Drop excess length check in legacy_merge_mount_data
>>
>> Changes since v2:
>>    - Remove legacy_merge_mount_data guard in fs/internal.h. All helpers
>>      don't use anything that would break build and just unchanged pointer
>>      will be returned.
>>    - Add __vfs_format_flags helper and use it in vfs_format_sb_flags
>>    - Fix inconsistent error code: replace -ENOSPC with -E2BIG for the
>>      buffer-too-small case
>>
>> Changes since v1:
>>    - Replace open-coded flag loops with append_entry() helper (pointer-
>>      advancing style) that unifies the comma-insert-copy pattern across
>>      all three call sites
>>    - Rework legacy_merge_mount_data() to allocate the page upfront, write
>>      user data first then append sb flags via vfs_format_sb_flags(); this
>>      eliminates the intermediate flags_buf[128], the total size calculation,
>>      and the +1/+2 arithmetic
>>    - Fix inconsistent error code: replace -EINVAL with -ENOSPC for the
>>      buffer-too-small case
>>    - Add comment on FS_BINARY_MOUNTDATA explaining why those filesystems
>>      are skipped
>>    - Add blank line before return in legacy_merge_mount_data()
>>    - Remove excess blank line after parse_monolithic_mount_data() in
>>      do_remount()
>>
>>   fs/fs_context.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   fs/internal.h   |   1 +
>>   fs/namespace.c  |  32 +++++++++++----
>>   3 files changed, 132 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/fs_context.c b/fs/fs_context.c
>> index 76f34f3d468e..d5129e4a919a 100644
>> --- a/fs/fs_context.c
>> +++ b/fs/fs_context.c
>> @@ -81,6 +81,112 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
>>   	return -ENOPARAM;
>>   }
>>   
>> +/*
>> + * Emit, into @buff at *@off, the comma-separated names of every entry in @p
>> + * whose bit is set in @flags.  Advances *@off past the written text.
>> + * Returns 0 on success or -ENOSPC if the buffer is too small.
>> + */
>> +static int __vfs_format_flags(const struct constant_table *p, unsigned int flags,
>> +			      char *buff, size_t size, size_t *off)
>> +{
>> +	for (; p->name; p++) {
>> +		ssize_t ret;
>> +
>> +		if (!(flags & p->value))
>> +			continue;
>> +
>> +		if (*off) {
>> +			if (*off + 1 >= size)
>> +				return -E2BIG;
>> +			buff[(*off)++] = ',';
>> +		}
>> +
>> +		ret = strscpy(buff + *off, p->name, size - *off);
>> +		if (ret < 0)
>> +			return -E2BIG;
>> +		*off += ret;
>> +	}
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Append fc's sb flags as comma-separated option names into @buff, advancing
>> + * *@off.  Matches vfs_parse_sb_flag() / common_{set,clear}_sb_flag tables.
>> + */
>> +static int vfs_format_sb_flags(struct fs_context *fc, char *buff, size_t size,
>> +			       size_t *off)
>> +{
>> +	int ret;
>> +
>> +	/* Positive names (ro, sync, ...) for bits that are on. */
>> +	ret = __vfs_format_flags(common_set_sb_flag, fc->sb_flags,
>> +				 buff, size, off);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/*
>> +	 * Negative names (rw, async, ...) only for bits being turned off: in
>> +	 * the mask but off in the value.  __vfs_format_flags() emits when
>> +	 * (arg & bit), so the emit set is "mask & ~flags".
>> +	 */
>> +	return __vfs_format_flags(common_clear_sb_flag,
>> +				  fc->sb_flags_mask & ~fc->sb_flags,
>> +				  buff, size, off);
>> +}
>> +
>> +/*
>> + * For legacy mount(2), MS_* mount flags are folded into fc->sb_flags and are
>> + * not present in the monolithic data string.  Build a page with user data
>> + * followed by those flags for ve_devmnt checks in vfs_parse_monolithic_sep.
>> + *
>> + * Returns @data when nothing needs to be added, a new page otherwise, or
>> + * ERR_PTR() on failure.  The caller must free_page() when the result != @data.
>> + */
>> +void *legacy_merge_mount_data(struct fs_context *fc, void *data)
>> +{
>> +	struct ve_struct *ve = get_exec_env();
>> +	size_t off = 0;
>> +	char *page;
>> +	int err;
>> +
>> +	if (ve_is_super(ve))
>> +		return data;
>> +
>> +	if (!fc->fs_type || !(fc->fs_type->fs_flags & FS_REQUIRES_DEV))
>> +		return data;
>> +
>> +	/*
>> +	 * Filesystems with binary mount data (e.g. btrfs) bypass option
>> +	 * string parsing entirely, so our checks cannot apply here.
>> +	 */
>> +	if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
>> +		return data;
>> +
>> +	page = (char *)__get_free_page(GFP_KERNEL);
>> +	if (!page)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	if (data && *(char *)data) {
>> +		ssize_t ret = strscpy(page, data, PAGE_SIZE);
>> +
>> +		if (ret < 0) {
>> +			err = -E2BIG;
>> +			goto err_free;
>> +		}
>> +		off = ret;
>> +	}
>> +
>> +	err = vfs_format_sb_flags(fc, page, PAGE_SIZE, &off);
>> +	if (err)
>> +		goto err_free;
>> +
>> +	return page;
>> +
>> +err_free:
>> +	free_page((unsigned long)page);
>> +	return ERR_PTR(err);
>> +}
>> +
>>   /**
>>    * vfs_parse_fs_param_source - Handle setting "source" via parameter
>>    * @fc: The filesystem context to modify
>> diff --git a/fs/internal.h b/fs/internal.h
>> index 0064345cb9d0..147130cab046 100644
>> --- a/fs/internal.h
>> +++ b/fs/internal.h
>> @@ -46,6 +46,7 @@ extern void __init chrdev_init(void);
>>    */
>>   extern const struct fs_context_operations legacy_fs_context_ops;
>>   extern int parse_monolithic_mount_data(struct fs_context *, void *);
>> +extern void *legacy_merge_mount_data(struct fs_context *fc, void *data);
>>   extern void vfs_clean_context(struct fs_context *fc);
>>   extern int finish_clean_context(struct fs_context *fc);
>>   
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index acd4507e1247..d4a419a0bca9 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -3295,6 +3295,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
>>   	struct super_block *sb = path->mnt->mnt_sb;
>>   	struct mount *mnt = real_mount(path->mnt);
>>   	struct fs_context *fc;
>> +	void *mnt_data = NULL;
>>   
>>   	if (!check_mnt(mnt))
>>   		return -EINVAL;
>> @@ -3315,13 +3316,17 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
>>   	 */
>>   	fc->oldapi = true;
>>   
>> -	err = ve_prepare_mount_options(fc, data);
>> -	if (err) {
>> -		put_fs_context(fc);
>> -		return err;
>> +	mnt_data = legacy_merge_mount_data(fc, data);
>> +	if (IS_ERR(mnt_data)) {
>> +		err = PTR_ERR(mnt_data);
>> +		goto err;
>>   	}
>>   
>> -	err = parse_monolithic_mount_data(fc, data);
>> +	err = ve_prepare_mount_options(fc, mnt_data);
>> +	if (err)
>> +		goto free_mnt_data;
>> +
>> +	err = parse_monolithic_mount_data(fc, mnt_data);
>>   	if (!err) {
>>   		down_write(&sb->s_umount);
>>   		err = -EPERM;
>> @@ -3338,6 +3343,10 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
>>   
>>   	mnt_warn_timestamp_expiry(path, &mnt->mnt);
>>   
>> +free_mnt_data:
>> +	if (mnt_data != data)
>> +		free_page((unsigned long)mnt_data);
>> +err:
>>   	put_fs_context(fc);
>>   	return err;
>>   }
>> @@ -3774,8 +3783,17 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
>>   					  subtype, strlen(subtype));
>>   	if (!err && name)
>>   		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
>> -	if (!err)
>> -		err = parse_monolithic_mount_data(fc, data);
>> +	if (!err) {
>> +		void *mnt_data = legacy_merge_mount_data(fc, data);
>> +
>> +		if (IS_ERR(mnt_data)) {
>> +			err = PTR_ERR(mnt_data);
>> +		} else {
>> +			err = parse_monolithic_mount_data(fc, mnt_data);
>> +			if (mnt_data != data)
>> +				free_page((unsigned long)mnt_data);
>> +		}
>> +	}
>>   	if (!err && !mount_capable(fc))
>>   		err = -EPERM;
>>   	if (!err)

-- 
Best regards, Vasileios Almpanis
Software Developer, Virtuozzo.



More information about the Devel mailing list