[Devel] [PATCH VZ10 v5 1/1] fs: namespace: transform mount flags to comma separated values
Vasileios Almpanis
vasileios.almpanis at virtuozzo.com
Fri Jun 26 12:13:26 MSK 2026
In legacy mount callpaths, userspace might pass mount options as
flags. These flags escape our checks in ve_devmnt_process allowing
devices to be mounted inside containers with options not specified in
the allowed field. Introduce helpers that take these flags and
already existing tables of flag -> string representation to construct
a comma separated value string from them, and append them to userspace
provided data. Then pass this string to parse_monolithic_mount_data
enforcing the same checks symmetrically in both mount and fsconfig
syscalls.
In the remount path, run legacy_merge_mount_data() before
ve_devmnt_process() so container device mount policy sees MS_* flags
from the legacy mount(2) API, not only the user-supplied option string.
Keep ve_prepare_mount_options() for legacy parsers that do not use
generic_parse_monolithic().
https://virtuozzo.atlassian.net/browse/VSTOR-132330
Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
Feature: ve: ve generic structures
---
Changes since v4:
- Don't emit the negative/clear sb flag names (rw, async, nomand,
nolazytime) in vfs_format_sb_flags(); emit only the positive names
(common_set_sb_flag). On the legacy remount path sb_flags_mask is the
fixed MS_RMT_MASK, so the previous code appended the clear-names for
every unset remountable bit, and ve_devmnt_check() then rejected
ordinary remounts within the container not on the host(ve0).
- Fix uninitialized mount-options page: NUL-terminate the buffer before
returning it from legacy_merge_mount_data(). When data is empty/NULL
and no flags are emitted, off stayed 0 and the page from
__get_free_page() was returned non-terminated.
- Fix __vfs_format_flags() comment: it returns -E2BIG, not -ENOSPC.
Changes since v3:
- Drop excess length check in legacy_merge_mount_data
Changes since v2:
- Remove legacy_merge_mount_data guard in fs/internal.h. All helpers
don't use anything that would break build and just unchanged pointer
will be returned.
- Add __vfs_format_flags helper and use it in vfs_format_sb_flags
- Fix inconsistent error code: replace -ENOSPC with -E2BIG for the
buffer-too-small case
Changes since v1:
- Replace open-coded flag loops with append_entry() helper (pointer-
advancing style) that unifies the comma-insert-copy pattern across
all three call sites
- Rework legacy_merge_mount_data() to allocate the page upfront, write
user data first then append sb flags via vfs_format_sb_flags(); this
eliminates the intermediate flags_buf[128], the total size calculation,
and the +1/+2 arithmetic
- Fix inconsistent error code: replace -EINVAL with -ENOSPC for the
buffer-too-small case
- Add comment on FS_BINARY_MOUNTDATA explaining why those filesystems
are skipped
- Add blank line before return in legacy_merge_mount_data()
- Remove excess blank line after parse_monolithic_mount_data() in
do_remount()
fs/fs_context.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/internal.h | 1 +
fs/namespace.c | 32 ++++++++++++++----
3 files changed, 116 insertions(+), 7 deletions(-)
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 76f34f3d468e..fdaefc227691 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -81,6 +81,96 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
return -ENOPARAM;
}
+/*
+ * Emit, into @buff at *@off, the comma-separated names of every entry in @p
+ * whose bit is set in @flags. Advances *@off past the written text.
+ * Returns 0 on success or -E2BIG if the buffer is too small.
+ */
+static int __vfs_format_flags(const struct constant_table *p, unsigned int flags,
+ char *buff, size_t size, size_t *off)
+{
+ for (; p->name; p++) {
+ ssize_t ret;
+
+ if (!(flags & p->value))
+ continue;
+
+ if (*off) {
+ if (*off + 1 >= size)
+ return -E2BIG;
+ buff[(*off)++] = ',';
+ }
+
+ ret = strscpy(buff + *off, p->name, size - *off);
+ if (ret < 0)
+ return -E2BIG;
+ *off += ret;
+ }
+ return 0;
+}
+
+static int vfs_format_sb_flags(struct fs_context *fc, char *buff, size_t size,
+ size_t *off)
+{
+ return __vfs_format_flags(common_set_sb_flag, fc->sb_flags,
+ buff, size, off);
+}
+
+/*
+ * For legacy mount(2), MS_* mount flags are folded into fc->sb_flags and are
+ * not present in the monolithic data string. Build a page with user data
+ * followed by those flags for ve_devmnt checks in vfs_parse_monolithic_sep.
+ *
+ * Returns @data when nothing needs to be added, a new page otherwise, or
+ * ERR_PTR() on failure. The caller must free_page() when the result != @data.
+ */
+void *legacy_merge_mount_data(struct fs_context *fc, void *data)
+{
+ struct ve_struct *ve = get_exec_env();
+ size_t off = 0;
+ char *page;
+ int err;
+
+ if (ve_is_super(ve))
+ return data;
+
+ if (!fc->fs_type || !(fc->fs_type->fs_flags & FS_REQUIRES_DEV))
+ return data;
+
+ /*
+ * Filesystems with binary mount data (e.g. btrfs) bypass option
+ * string parsing entirely, so our checks cannot apply here.
+ */
+ if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
+ return data;
+
+ page = (char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ if (data && *(char *)data) {
+ ssize_t ret = strscpy(page, data, PAGE_SIZE);
+
+ if (ret < 0) {
+ err = -E2BIG;
+ goto err_free;
+ }
+ off = ret;
+ }
+
+ err = vfs_format_sb_flags(fc, page, PAGE_SIZE, &off);
+ if (err)
+ goto err_free;
+
+ page[off] = '\0';
+
+ return page;
+
+err_free:
+ free_page((unsigned long)page);
+ return ERR_PTR(err);
+}
+
/**
* vfs_parse_fs_param_source - Handle setting "source" via parameter
* @fc: The filesystem context to modify
diff --git a/fs/internal.h b/fs/internal.h
index 1b5cb1cda2e4..e823b69dd077 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -46,6 +46,7 @@ extern void __init chrdev_init(void);
*/
extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
+extern void *legacy_merge_mount_data(struct fs_context *fc, void *data);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);
diff --git a/fs/namespace.c b/fs/namespace.c
index 566f11a222fc..0864fbd98131 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3337,6 +3337,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
struct super_block *sb = path->mnt->mnt_sb;
struct mount *mnt = real_mount(path->mnt);
struct fs_context *fc;
+ void *mnt_data = NULL;
if (!check_mnt(mnt))
return -EINVAL;
@@ -3357,13 +3358,17 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
*/
fc->oldapi = true;
- err = ve_prepare_mount_options(fc, data);
- if (err) {
- put_fs_context(fc);
- return err;
+ mnt_data = legacy_merge_mount_data(fc, data);
+ if (IS_ERR(mnt_data)) {
+ err = PTR_ERR(mnt_data);
+ goto err;
}
- err = parse_monolithic_mount_data(fc, data);
+ err = ve_prepare_mount_options(fc, mnt_data);
+ if (err)
+ goto free_mnt_data;
+
+ err = parse_monolithic_mount_data(fc, mnt_data);
if (!err) {
down_write(&sb->s_umount);
err = -EPERM;
@@ -3380,6 +3385,10 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
mnt_warn_timestamp_expiry(path, &mnt->mnt);
+free_mnt_data:
+ if (mnt_data != data)
+ free_page((unsigned long)mnt_data);
+err:
put_fs_context(fc);
return err;
}
@@ -3816,8 +3825,17 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
subtype, strlen(subtype));
if (!err && name)
err = vfs_parse_fs_string(fc, "source", name, strlen(name));
- if (!err)
- err = parse_monolithic_mount_data(fc, data);
+ if (!err) {
+ void *mnt_data = legacy_merge_mount_data(fc, data);
+
+ if (IS_ERR(mnt_data)) {
+ err = PTR_ERR(mnt_data);
+ } else {
+ err = parse_monolithic_mount_data(fc, mnt_data);
+ if (mnt_data != data)
+ free_page((unsigned long)mnt_data);
+ }
+ }
if (!err && !mount_capable(fc))
err = -EPERM;
if (!err)
--
2.47.3
More information about the Devel
mailing list