[Devel] [PATCH v3 VZ10 1/1] fs: namespace: transform mount flags to comma separated values

Vasileios Almpanis vasileios.almpanis at virtuozzo.com
Wed Jun 3 15:32:33 MSK 2026


In legacy mount callpaths, userspace might pass mount options as
flags. These flags escape our checks in ve_devmnt_process allowing
devices to be mounted inside containers with options not specified in
the allowed field. Introduce helpers that take these flags and
already existing tables of flag -> string representation to construct
a comma separated value string from them, and append them to userspace
provided data. Then pass this string to parse_monolithic_mount_data
enforcing the same checks symmetrically in both mount and fsconfig
syscalls.

In the remount path, run legacy_merge_mount_data() before
ve_devmnt_process() so container device mount policy sees MS_* flags
from the legacy mount(2) API, not only the user-supplied option string.
Keep ve_prepare_mount_options() for legacy parsers that do not use
generic_parse_monolithic().

https://virtuozzo.atlassian.net/browse/VSTOR-132330
Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>

Feature: ve: ve generic structures
---
Changes since v2:
  - Remove legacy_merge_mount_data guard in fs/internal.h. All helpers
    don't use anything that would break build and just unchanged pointer
    will be returned.
  - Add __vfs_format_flags helper and use it in vfs_format_sb_flags
  - Fix inconsistent error code: replace -ENOSPC with -E2BIG for the
    buffer-too-small case

Changes since v1:
  - Replace open-coded flag loops with append_entry() helper (pointer-
    advancing style) that unifies the comma-insert-copy pattern across
    all three call sites
  - Rework legacy_merge_mount_data() to allocate the page upfront, write
    user data first then append sb flags via vfs_format_sb_flags(); this
    eliminates the intermediate flags_buf[128], the total size calculation,
    and the +1/+2 arithmetic
  - Fix inconsistent error code: replace -EINVAL with -ENOSPC for the
    buffer-too-small case
  - Add comment on FS_BINARY_MOUNTDATA explaining why those filesystems
    are skipped
  - Add blank line before return in legacy_merge_mount_data()
  - Remove excess blank line after parse_monolithic_mount_data() in
    do_remount()

 fs/fs_context.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/internal.h   |   1 +
 fs/namespace.c  |  32 +++++++++++---
 3 files changed, 137 insertions(+), 7 deletions(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 76f34f3d468e..353d1eea77d0 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -81,6 +81,117 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
 	return -ENOPARAM;
 }
 
+/*
+ * Emit, into @buff at *@off, the comma-separated names of every entry in @p
+ * whose bit is set in @flags.  Advances *@off past the written text.
+ * Returns 0 on success or -E2BIG if the buffer is too small.
+ */
+static int __vfs_format_flags(const struct constant_table *p, unsigned int flags,
+			      char *buff, size_t size, size_t *off)
+{
+	for (; p->name; p++) {
+		ssize_t ret;
+
+		if (!(flags & p->value))
+			continue;
+
+		if (*off) {
+			if (*off + 1 >= size)
+				return -E2BIG;
+			buff[(*off)++] = ',';
+		}
+
+		ret = strscpy(buff + *off, p->name, size - *off);
+		if (ret < 0)
+			return -E2BIG;
+		*off += ret;
+	}
+	return 0;
+}
+
+/*
+ * Append fc's sb flags as comma-separated option names into @buff, advancing
+ * *@off.  Matches vfs_parse_sb_flag() / common_{set,clear}_sb_flag tables.
+ */
+static int vfs_format_sb_flags(struct fs_context *fc, char *buff, size_t size,
+			       size_t *off)
+{
+	int ret;
+
+	/* Positive names (ro, sync, ...) for bits that are on. */
+	ret = __vfs_format_flags(common_set_sb_flag, fc->sb_flags,
+				 buff, size, off);
+	if (ret)
+		return ret;
+
+	/*
+	 * Negative names (rw, async, ...) only for bits being turned off: in
+	 * the mask but off in the value.  __vfs_format_flags() emits when
+	 * (arg & bit), so the emit set is "mask & ~flags".
+	 */
+	return __vfs_format_flags(common_clear_sb_flag,
+				  fc->sb_flags_mask & ~fc->sb_flags,
+				  buff, size, off);
+}
+
+/*
+ * For legacy mount(2), MS_* mount flags are folded into fc->sb_flags and are
+ * not present in the monolithic data string.  Build a page with user data
+ * followed by those flags for ve_devmnt checks in vfs_parse_monolithic_sep.
+ *
+ * Returns @data when nothing needs to be added, a new page otherwise, or
+ * ERR_PTR() on failure.  The caller must free_page() when the result != @data.
+ */
+void *legacy_merge_mount_data(struct fs_context *fc, void *data)
+{
+	struct ve_struct *ve = get_exec_env();
+	size_t off = 0;
+	char *page;
+	int err;
+
+	if (ve_is_super(ve))
+		return data;
+
+	if (!fc->fs_type || !(fc->fs_type->fs_flags & FS_REQUIRES_DEV))
+		return data;
+
+	/*
+	 * Filesystems with binary mount data (e.g. btrfs) bypass option
+	 * string parsing entirely, so our checks cannot apply here.
+	 */
+	if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
+		return data;
+
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	if (data && *(char *)data) {
+		ssize_t ret = strscpy(page, data, PAGE_SIZE);
+
+		if (ret < 0) {
+			err = -E2BIG;
+			goto err_free;
+		}
+		off = ret;
+	}
+
+	err = vfs_format_sb_flags(fc, page, PAGE_SIZE, &off);
+	if (err)
+		goto err_free;
+
+	if (!off) {
+		free_page((unsigned long)page);
+		return data;
+	}
+
+	return page;
+
+err_free:
+	free_page((unsigned long)page);
+	return ERR_PTR(err);
+}
+
 /**
  * vfs_parse_fs_param_source - Handle setting "source" via parameter
  * @fc: The filesystem context to modify
diff --git a/fs/internal.h b/fs/internal.h
index 0064345cb9d0..147130cab046 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -46,6 +46,7 @@ extern void __init chrdev_init(void);
  */
 extern const struct fs_context_operations legacy_fs_context_ops;
 extern int parse_monolithic_mount_data(struct fs_context *, void *);
+extern void *legacy_merge_mount_data(struct fs_context *fc, void *data);
 extern void vfs_clean_context(struct fs_context *fc);
 extern int finish_clean_context(struct fs_context *fc);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index acd4507e1247..d4a419a0bca9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3295,6 +3295,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	struct super_block *sb = path->mnt->mnt_sb;
 	struct mount *mnt = real_mount(path->mnt);
 	struct fs_context *fc;
+	void *mnt_data = NULL;
 
 	if (!check_mnt(mnt))
 		return -EINVAL;
@@ -3315,13 +3316,17 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	 */
 	fc->oldapi = true;
 
-	err = ve_prepare_mount_options(fc, data);
-	if (err) {
-		put_fs_context(fc);
-		return err;
+	mnt_data = legacy_merge_mount_data(fc, data);
+	if (IS_ERR(mnt_data)) {
+		err = PTR_ERR(mnt_data);
+		goto err;
 	}
 
-	err = parse_monolithic_mount_data(fc, data);
+	err = ve_prepare_mount_options(fc, mnt_data);
+	if (err)
+		goto free_mnt_data;
+
+	err = parse_monolithic_mount_data(fc, mnt_data);
 	if (!err) {
 		down_write(&sb->s_umount);
 		err = -EPERM;
@@ -3338,6 +3343,10 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 
 	mnt_warn_timestamp_expiry(path, &mnt->mnt);
 
+free_mnt_data:
+	if (mnt_data != data)
+		free_page((unsigned long)mnt_data);
+err:
 	put_fs_context(fc);
 	return err;
 }
@@ -3774,8 +3783,17 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
 					  subtype, strlen(subtype));
 	if (!err && name)
 		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
-	if (!err)
-		err = parse_monolithic_mount_data(fc, data);
+	if (!err) {
+		void *mnt_data = legacy_merge_mount_data(fc, data);
+
+		if (IS_ERR(mnt_data)) {
+			err = PTR_ERR(mnt_data);
+		} else {
+			err = parse_monolithic_mount_data(fc, mnt_data);
+			if (mnt_data != data)
+				free_page((unsigned long)mnt_data);
+		}
+	}
 	if (!err && !mount_capable(fc))
 		err = -EPERM;
 	if (!err)
-- 
2.43.0



More information about the Devel mailing list