[Devel] [PATCH RHEL10 COMMIT] ve/mount: thread owning ve through alloc_vfsmnt/clone_mnt/copy_tree
Konstantin Khorenko
khorenko at virtuozzo.com
Thu May 14 18:50:48 MSK 2026
The commit is pushed to "branch-rh10-6.12.0-55.52.1.5.x.vz10-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh10-6.12.0-55.52.1.5.24.vz10
------>
commit 74283849022503f0553f4330d5edb0005e889e6d
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Wed Apr 29 15:41:36 2026 +0200
ve/mount: thread owning ve through alloc_vfsmnt/clone_mnt/copy_tree
Add owner_ve parameter to alloc_vfsmnt(), clone_mnt() and copy_tree(),
to identify correct ownership of mount for accounting. NULL preserves
the existing behaviour of taking current ve via get_exec_env().
This will be used to derive correct ownership of newly created mounts in
case of simultaneous creation of new ve namespace and mount namespace,
where we would like new mounts to have the new ve as an owner.
There is no change in behaviour yet: copy_mnt_ns() still resolves the
owner from current ve via get_exec_env().
https://virtuozzo.atlassian.net/browse/VSTOR-129744
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Reviewed-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
Feature: ve: ve generic structures
======
Patchset description:
ve: fix owner_ve of net/mnt namespaces created together with CLONE_NEWVE
When CLONE_NEWVE is combined with CLONE_NEWNET and/or CLONE_NEWNS in a
single clone3() or unshare(), copy_net_ns() and copy_mnt_ns() resolve
the owning ve via get_exec_env(), which still points at the parent ve
at that point. The freshly created net/mnt namespaces end up wired to
the wrong ve, and unshare(CLONE_NEWVE | CLONE_NEW{NS,NET}) is rejected
outright by check_unshare_flags().
Fix it by threading the new ve from copy_namespaces() and
unshare_nsproxy_namespaces() down into copy_net_ns() and copy_mnt_ns(),
so the correct ve is charged for the new netns and for every mount in
the new mntns.
Patches 1-4 are pure plumbing (signature changes, no behaviour change).
Patch 5 is the actual fix that forwards the new ve. Patch 6 drops the
now-redundant CLONE_NEWVE-alone restriction in check_unshare_flags().
Patch 7 exposes ve.mnt_nr via cgroupfs to make per-ve mount accounting
observable from userspace. Patch 8 adds a selftest covering both the
clone3() and unshare() paths.
Verified with crash on a vzctl-started container: task_ve,
nsproxy->net_ns->owner_ve, nsproxy->mnt_ns->ve_owner and
nsproxy->mnt_ns->root.ve_owner all resolve to the new ve.
The new selftest passes both cases.
---
fs/namespace.c | 43 +++++++++++++++++++++++++------------------
fs/pnode.c | 2 +-
fs/pnode.h | 5 ++++-
3 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index dd10ed5007ea2..ba2cee9a6db1c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -308,10 +308,10 @@ int mnt_get_count(struct mount *mnt)
}
static inline int ve_mount_allowed(void);
-static inline void ve_mount_nr_inc(struct mount *mnt);
+static inline void ve_mount_nr_inc(struct mount *mnt, struct ve_struct *ve);
static inline void ve_mount_nr_dec(struct mount *mnt);
-static struct mount *alloc_vfsmnt(const char *name)
+static struct mount *alloc_vfsmnt(const char *name, struct ve_struct *owner_ve)
{
struct mount *mnt;
@@ -360,7 +360,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_umounting);
INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
mnt->mnt.mnt_idmap = &nop_mnt_idmap;
- ve_mount_nr_inc(mnt);
+ ve_mount_nr_inc(mnt, owner_ve);
}
return mnt;
@@ -1223,7 +1223,7 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
if (!fc->root)
return ERR_PTR(-EINVAL);
- mnt = alloc_vfsmnt(fc->source ?: "none");
+ mnt = alloc_vfsmnt(fc->source ?: "none", NULL);
if (!mnt)
return ERR_PTR(-ENOMEM);
@@ -1325,13 +1325,13 @@ vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
EXPORT_SYMBOL_GPL(vfs_submount);
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
- int flag)
+ int flag, struct ve_struct *owner_ve)
{
struct super_block *sb = old->mnt.mnt_sb;
struct mount *mnt;
int err;
- mnt = alloc_vfsmnt(old->mnt_devname);
+ mnt = alloc_vfsmnt(old->mnt_devname, owner_ve);
if (!mnt)
return ERR_PTR(-ENOMEM);
@@ -1565,7 +1565,7 @@ EXPORT_SYMBOL(path_is_mountpoint);
struct vfsmount *mnt_clone_internal(const struct path *path)
{
struct mount *p;
- p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
+ p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE, NULL);
if (IS_ERR(p))
return ERR_CAST(p);
p->mnt.mnt_flags |= MNT_INTERNAL;
@@ -2153,7 +2153,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
}
struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
- int flag)
+ int flag, struct ve_struct *owner_ve)
{
struct mount *res, *src_parent, *src_root_child, *src_mnt,
*dst_parent, *dst_mnt;
@@ -2164,7 +2164,7 @@ struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);
- res = dst_mnt = clone_mnt(src_root, dentry, flag);
+ res = dst_mnt = clone_mnt(src_root, dentry, flag, owner_ve);
if (IS_ERR(dst_mnt))
return dst_mnt;
@@ -2200,7 +2200,8 @@ struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
src_parent = src_mnt;
dst_parent = dst_mnt;
- dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag);
+ dst_mnt = clone_mnt(src_mnt, src_mnt->mnt.mnt_root, flag,
+ owner_ve);
if (IS_ERR(dst_mnt))
goto out;
lock_mount_hash();
@@ -2230,7 +2231,7 @@ struct vfsmount *collect_mounts(const struct path *path)
tree = ERR_PTR(-EINVAL);
else
tree = copy_tree(real_mount(path->mnt), path->dentry,
- CL_COPY_ALL | CL_PRIVATE);
+ CL_COPY_ALL | CL_PRIVATE, NULL);
namespace_unlock();
if (IS_ERR(tree))
return ERR_CAST(tree);
@@ -2311,7 +2312,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (has_locked_children(old_mnt, path->dentry))
goto invalid;
- new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+ new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE, NULL);
up_read(&namespace_sem);
if (IS_ERR(new_mnt))
@@ -2803,9 +2804,10 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
return mnt;
if (recurse)
- mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
+ mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE,
+ NULL);
else
- mnt = clone_mnt(old, old_path->dentry, 0);
+ mnt = clone_mnt(old, old_path->dentry, 0, NULL);
if (!IS_ERR(mnt))
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
@@ -3221,9 +3223,10 @@ static inline int ve_mount_allowed(void)
atomic_read(&ve->mnt_nr) < (int)sysctl_ve_mount_nr;
}
-static inline void ve_mount_nr_inc(struct mount *mnt)
+static inline void ve_mount_nr_inc(struct mount *mnt, struct ve_struct *ve)
{
- struct ve_struct *ve = get_exec_env();
+ if (!ve)
+ ve = get_exec_env();
mnt->ve_owner = get_ve(ve);
atomic_inc(&ve->mnt_nr);
@@ -3258,7 +3261,7 @@ bool is_sb_ve_accessible(struct ve_struct *ve, struct super_block *sb)
#else /* CONFIG_VE */
static inline int ve_mount_allowed(void) { return 1; }
-static inline void ve_mount_nr_inc(struct mount *mnt) { }
+static inline void ve_mount_nr_inc(struct mount *mnt, struct ve_struct *ve) { }
static inline void ve_mount_nr_dec(struct mount *mnt) { }
#endif /* CONFIG_VE */
@@ -4235,7 +4238,11 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE;
- new = copy_tree(old, old->mnt.mnt_root, copy_flags);
+#ifdef CONFIG_VE
+ new = copy_tree(old, old->mnt.mnt_root, copy_flags, new_ns->ve_owner);
+#else
+ new = copy_tree(old, old->mnt.mnt_root, copy_flags, NULL);
+#endif
if (IS_ERR(new)) {
namespace_unlock();
ns_free_inum(&new_ns->ns);
diff --git a/fs/pnode.c b/fs/pnode.c
index a799e0315cc9a..a91e7aac1601d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -257,7 +257,7 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
type |= CL_MAKE_SHARED;
}
- child = copy_tree(last_source, last_source->mnt.mnt_root, type);
+ child = copy_tree(last_source, last_source->mnt.mnt_root, type, NULL);
if (IS_ERR(child))
return PTR_ERR(child);
read_seqlock_excl(&mount_lock);
diff --git a/fs/pnode.h b/fs/pnode.h
index 0b02a63938911..c865dd91f0a37 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -10,6 +10,8 @@
#include <linux/list.h>
#include "mount.h"
+struct ve_struct;
+
#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
@@ -49,7 +51,8 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
struct mount *mnt);
-struct mount *copy_tree(struct mount *, struct dentry *, int);
+struct mount *copy_tree(struct mount *, struct dentry *, int,
+ struct ve_struct *);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
More information about the Devel
mailing list