[Devel] [PATCH RH8 4/4] ms/teach move_mount(2) to work with OPEN_TREE_CLONE
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Wed Sep 2 15:51:21 MSK 2020
Patchset description:
These syscalls were added as preparation step for new mount api (fsopen,
fsconfig, fsmount and fspick will be ported separately).
We can use them to implement "cross-namespace bind-mounting" like this:
fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
setns(nsfd, CLONE_NEWNS);
move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);
This will allow us implementing feature of adding bindmounts to runing
container instead of having unreliable external propagations.
Version for VZ8 is slightly different from VZ7 version.
https://jira.sw.ru/browse/PSBM-107263
Current patch description:
From: David Howells <dhowells at redhat.com>
Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
attached by move_mount(2).
If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
not detached anymore, it won't be dissolved. move_mount(2) is adjusted
to handle detached source.
That gives us equivalents of mount --bind and mount --rbind.
Thanks also to Alan Jenkins <alan.christopher.jenkins at gmail.com> for
providing a whole bunch of ways to break things using this interface.
Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells at redhat.com>
Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
teach move_mount(2) to work with OPEN_TREE_CLONE
(cherry-picked from commit 44dfd84a6d54a675e35ab618d9fab47b36cb78cd)
do_move_mount(): fix an unsafe use of is_anon_ns()
(cherry-picked from commit 05883eee857eab4693e7d13ebab06716475c5754)
vfs: move_mount: reject moving kernel internal mounts
(cherry-picked from commit 570d7a98e7d6d5d8706d94ffd2d40adeaa318332)
https://jira.sw.ru/browse/PSBM-107263
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
fs/namespace.c | 63 ++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 56 insertions(+), 7 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 700c73f0bb94..5942ffe3802c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1815,10 +1815,16 @@ void dissolve_on_fput(struct vfsmount *mnt)
namespace_lock();
lock_mount_hash();
ns = real_mount(mnt)->mnt_ns;
- umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+ if (ns) {
+ if (is_anon_ns(ns))
+ umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+ else
+ ns = NULL;
+ }
unlock_mount_hash();
namespace_unlock();
- free_mnt_ns(ns);
+ if (ns)
+ free_mnt_ns(ns);
}
void drop_collected_mounts(struct vfsmount *mnt)
@@ -2026,6 +2032,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
+ if (source_mnt->mnt_ns) {
+ /* move from anon - the caller will destroy */
+ list_del_init(&source_mnt->mnt_ns->list);
+ }
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
@@ -2493,13 +2503,37 @@ static int do_set_group(struct path *path, const char *sibling_name)
return err;
}
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree. Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+ struct mount *p;
+ bool ret = false;
+
+ lock_mount_hash();
+ for (p = subtree; p; p = next_mnt(p, subtree))
+ if (mnt_ns_loop(p->mnt.mnt_root))
+ goto out;
+
+ ret = true;
+out:
+ unlock_mount_hash();
+ return ret;
+}
+
static int do_move_mount(struct path *old_path, struct path *new_path)
{
struct path parent_path = {.mnt = NULL, .dentry = NULL};
+ struct mnt_namespace *ns;
struct mount *p;
struct mount *old;
struct mountpoint *mp;
int err;
+ bool attached;
mp = lock_mount(new_path);
if (IS_ERR(mp))
@@ -2507,12 +2541,20 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
old = real_mount(old_path->mnt);
p = real_mount(new_path->mnt);
+ attached = mnt_has_parent(old);
+ ns = old->mnt_ns;
err = -EINVAL;
- if (!check_mnt(p) || !check_mnt(old))
+ /* The mountpoint must be in our namespace. */
+ if (!check_mnt(p))
goto out;
- if (!mnt_has_parent(old))
+ /* The thing moved must be mounted... */
+ if (!is_mounted(&old->mnt))
+ goto out;
+
+ /* ... and either ours or the root of anon namespace */
+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
if (old->mnt.mnt_flags & MNT_LOCKED)
@@ -2527,7 +2569,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
/*
* Don't move a mount residing in a shared parent.
*/
- if (IS_MNT_SHARED(old->mnt_parent))
+ if (attached && IS_MNT_SHARED(old->mnt_parent))
goto out;
/*
* Don't move a mount tree containing unbindable mounts to a destination
@@ -2536,12 +2578,14 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
goto out;
err = -ELOOP;
+ if (!check_for_nsfs_mounts(old))
+ goto out;
for (; mnt_has_parent(p); p = p->mnt_parent)
if (p == old)
goto out;
err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
- &parent_path);
+ attached ? &parent_path : NULL);
if (err)
goto out;
@@ -2550,8 +2594,11 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
list_del_init(&old->mnt_expire);
out:
unlock_mount(mp);
- if (!err)
+ if (!err) {
path_put(&parent_path);
+ if (!attached)
+ free_mnt_ns(ns);
+ }
return err;
}
@@ -3214,6 +3261,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
/*
* Move a mount from one place to another.
+ * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
+ * used to copy a mount subtree.
*
* Note the flags value is a combination of MOVE_MOUNT_* flags.
*/
--
2.24.1
More information about the Devel
mailing list