[Devel] [PATCH RHEL7 COMMIT] ms/vfs: syscall: Add move_mount(2) to move mounts around
Vasily Averin
vvs at virtuozzo.com
Wed Aug 26 09:47:45 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.11
------>
commit 2bb98f0659738df2f07dd7dcb7f136d8a7164927
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Wed Aug 26 09:47:45 2020 +0300
ms/vfs: syscall: Add move_mount(2) to move mounts around
Patchset description:
These syscalls were added as preparation step for new mount api (fsopen,
fsconfig, fsmount and fspick will be ported separately).
We can use them to implement "cross-namespace bind-mounting" like this:
fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
setns(nsfd, CLONE_NEWNS);
move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);
This will allow us implementing feature of adding bindmounts to runing
container instead of having unreliable external propagations.
It is needed to VZ8, but does not apply cleanly so I will send it
separately.
https://jira.sw.ru/browse/PSBM-107263
Current patch description:
From: David Howells <dhowells at redhat.com>
Add a move_mount() system call that will move a mount from one place to
another and, in the next commit, allow to attach an unattached mount tree.
The new system call looks like the following:
int move_mount(int from_dfd, const char *from_path,
int to_dfd, const char *to_path,
unsigned int flags);
Signed-off-by: David Howells <dhowells at redhat.com>
cc: linux-api at vger.kernel.org
Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
vfs: syscall: Add move_mount(2) to move mounts around
(cherry-picked from commit 2db154b3ea8e14b04fee23e3fdfd5e9d17fbc6ae)
uapi, x86: Fix the syscall numbering of the mount API syscalls [ver #2]
(cherry-picked from commit 9c8ad7a2ff0bfe58f019ec0abc1fb965114dde7d)
selinux: fix regression introduced by move_mount(2) syscall
(cherry-picked from commit 98aa00345de54b8340dc2ddcd87f446d33387b5e)
https://jira.sw.ru/browse/PSBM-107263
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
arch/x86/syscalls/syscall_32.tbl | 1 +
arch/x86/syscalls/syscall_64.tbl | 1 +
fs/namespace.c | 126 +++++++++++++++++++++++++++++----------
include/linux/security.h | 12 ++++
include/linux/syscalls.h | 3 +
include/uapi/linux/fs.h | 11 ++++
security/capability.c | 6 ++
security/security.c | 5 ++
security/selinux/hooks.c | 9 +++
9 files changed, 143 insertions(+), 31 deletions(-)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 7b32f7c..978f07c 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -372,6 +372,7 @@
382 i386 pkey_free sys_pkey_free
428 i386 open_tree sys_open_tree
+429 i386 move_mount sys_move_mount
510 i386 getluid sys_getluid
511 i386 setluid sys_setluid
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 6b3a1d1..3c86aba 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -337,6 +337,7 @@
331 common pkey_free sys_pkey_free
428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
500 64 getluid sys_getluid
501 64 setluid sys_setluid
diff --git a/fs/namespace.c b/fs/namespace.c
index 694e3d6..0820db1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2852,72 +2852,81 @@ out_unlock:
return err;
}
-static int do_move_mount(struct path *path, const char *old_name)
+static int do_move_mount(struct path *old_path, struct path *new_path)
{
- struct path old_path, parent_path;
+ struct path parent_path = {.mnt = NULL, .dentry = NULL};
struct mount *p;
struct mount *old;
struct mountpoint *mp;
int err;
- if (!old_name || !*old_name)
- return -EINVAL;
- err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
- if (err)
- return err;
- mp = lock_mount(path);
- err = PTR_ERR(mp);
+ mp = lock_mount(new_path);
if (IS_ERR(mp))
- goto out;
+ return PTR_ERR(mp);
- old = real_mount(old_path.mnt);
- p = real_mount(path->mnt);
+ old = real_mount(old_path->mnt);
+ p = real_mount(new_path->mnt);
err = -EINVAL;
if (!check_mnt(p) || !check_mnt(old))
- goto out1;
+ goto out;
- if (old->mnt.mnt_flags & MNT_LOCKED)
- goto out1;
+ if (!mnt_has_parent(old))
+ goto out;
- err = -EINVAL;
- if (old_path.dentry != old_path.mnt->mnt_root)
- goto out1;
+ if (old->mnt.mnt_flags & MNT_LOCKED)
+ goto out;
- if (!mnt_has_parent(old))
- goto out1;
+ if (old_path->dentry != old_path->mnt->mnt_root)
+ goto out;
- if (S_ISDIR(path->dentry->d_inode->i_mode) !=
- S_ISDIR(old_path.dentry->d_inode->i_mode))
- goto out1;
+ if (S_ISDIR(new_path->dentry->d_inode->i_mode) !=
+ S_ISDIR(old_path->dentry->d_inode->i_mode))
+ goto out;
/*
* Don't move a mount residing in a shared parent.
*/
if (IS_MNT_SHARED(old->mnt_parent))
- goto out1;
+ goto out;
/*
* Don't move a mount tree containing unbindable mounts to a destination
* mount which is shared.
*/
if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
- goto out1;
+ goto out;
err = -ELOOP;
for (; mnt_has_parent(p); p = p->mnt_parent)
if (p == old)
- goto out1;
+ goto out;
- err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
+ err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
+ &parent_path);
if (err)
- goto out1;
+ goto out;
/* if the mount is moved, it should no longer be expire
* automatically */
list_del_init(&old->mnt_expire);
-out1:
- unlock_mount(mp);
out:
+ unlock_mount(mp);
if (!err)
path_put(&parent_path);
+ return err;
+}
+
+static int do_move_mount_old(struct path *path, const char *old_name)
+{
+ struct path old_path;
+ int err;
+
+ if (!old_name || !*old_name)
+ return -EINVAL;
+
+ err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
+ if (err)
+ return err;
+
+ err = do_move_mount(&old_path, path);
path_put(&old_path);
return err;
}
@@ -3334,7 +3343,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
else if (cmd & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (cmd & MS_MOVE)
- retval = do_move_mount(&path, dev_name);
+ retval = do_move_mount_old(&path, dev_name);
else if (cmd & MS_SET_GROUP)
retval = do_set_group(&path, dev_name);
else
@@ -3568,6 +3577,61 @@ out_type:
}
/*
+ * Move a mount from one place to another.
+ *
+ * Note the flags value is a combination of MOVE_MOUNT_* flags.
+ */
+SYSCALL_DEFINE5(move_mount,
+ int, from_dfd, const char *, from_pathname,
+ int, to_dfd, const char *, to_pathname,
+ unsigned int, flags)
+{
+ struct path from_path, to_path;
+ unsigned int lflags;
+ int ret = 0;
+
+ if (!may_mount())
+ return -EPERM;
+
+ if (flags & ~MOVE_MOUNT__MASK)
+ return -EINVAL;
+
+ /* If someone gives a pathname, they aren't permitted to move
+ * from an fd that requires unmount as we can't get at the flag
+ * to clear it afterwards.
+ */
+ lflags = 0;
+ if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
+ if (ret < 0)
+ return ret;
+
+ lflags = 0;
+ if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
+ if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
+ if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
+
+ ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
+ if (ret < 0)
+ goto out_from;
+
+ ret = security_move_mount(&from_path, &to_path);
+ if (ret < 0)
+ goto out_to;
+
+ ret = do_move_mount(&from_path, &to_path);
+
+out_to:
+ path_put(&to_path);
+out_from:
+ path_put(&from_path);
+ return ret;
+}
+
+/*
* Return true if path is reachable from root
*
* namespace_sem or mount_lock is held
diff --git a/include/linux/security.h b/include/linux/security.h
index 47aed52..2fe05c2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -323,6 +323,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* Parse a string of security data filling in the opts structure
* @options string containing all mount options known by the LSM
* @opts binary data structure usable by the LSM
+ * @move_mount:
+ * Check permission before a mount is moved.
+ * @from_path indicates the mount that is going to be moved.
+ * @to_path indicates the mountpoint that will be mounted upon.
* @dentry_init_security:
* Compute a context for a dentry as the inode is not yet available
* since NFSv4 has no label backed by an EA anyway.
@@ -1559,6 +1563,7 @@ struct security_operations {
unsigned long kern_flags,
unsigned long *set_kern_flags);
int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
+ int (*move_mount)(const struct path *from_path, const struct path *to_path);
int (*dentry_init_security) (struct dentry *dentry, int mode,
struct qstr *name, void **ctx,
u32 *ctxlen);
@@ -1880,6 +1885,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb,
unsigned long kern_flags,
unsigned long *set_kern_flags);
int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts);
+int security_move_mount(const struct path *from_path, const struct path *to_path);
int security_dentry_init_security(struct dentry *dentry, int mode,
struct qstr *name, void **ctx,
u32 *ctxlen);
@@ -2209,6 +2215,12 @@ static inline int security_sb_parse_opts_str(char *options, struct security_mnt_
return 0;
}
+static inline int security_move_mount(const struct path *from_path,
+ const struct path *to_path)
+{
+ return 0;
+}
+
static inline int security_inode_alloc(struct inode *inode)
{
return 0;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0e30297..dce905f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -892,5 +892,8 @@ asmlinkage long sys_membarrier(int cmd, int flags);
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
+asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
+ int to_dfd, const char __user *to_path,
+ unsigned int ms_flags);
#endif
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 0fc42f1..8c9e6a2 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -131,6 +131,17 @@ struct inodes_stat_t {
#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
+/*
+ * move_mount() flags.
+ */
+#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
+#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
+#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#define MOVE_MOUNT__MASK 0x00000077
+
/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
diff --git a/security/capability.c b/security/capability.c
index aff9bb8..47cd798 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -114,6 +114,11 @@ static int cap_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
return 0;
}
+static int cap_move_mount(const struct path *from_path, const struct path *to_path)
+{
+ return 0;
+}
+
static int cap_dentry_init_security(struct dentry *dentry, int mode,
struct qstr *name, void **ctx,
u32 *ctxlen)
@@ -1019,6 +1024,7 @@ void __init security_fixup_ops(struct security_operations *ops)
set_to_cap_if_null(ops, sb_set_mnt_opts);
set_to_cap_if_null(ops, sb_clone_mnt_opts);
set_to_cap_if_null(ops, sb_parse_opts_str);
+ set_to_cap_if_null(ops, move_mount);
set_to_cap_if_null(ops, dentry_init_security);
set_to_cap_if_null(ops, dentry_create_files_as);
set_to_cap_if_null(ops, inode_alloc_security);
diff --git a/security/security.c b/security/security.c
index 44aaadf..b6ba158e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -345,6 +345,11 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
}
EXPORT_SYMBOL(security_sb_parse_opts_str);
+int security_move_mount(const struct path *from_path, const struct path *to_path)
+{
+ return security_ops->move_mount(from_path, to_path);
+}
+
int security_inode_alloc(struct inode *inode)
{
inode->i_security = NULL;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 84a6f1a..aa7bb4b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2815,6 +2815,14 @@ static int selinux_mount(const char *dev_name,
return path_has_perm(cred, path, FILE__MOUNTON);
}
+static int selinux_move_mount(const struct path *from_path,
+ const struct path *to_path)
+{
+ const struct cred *cred = current_cred();
+
+ return path_has_perm(cred, to_path, FILE__MOUNTON);
+}
+
static int selinux_umount(struct vfsmount *mnt, int flags)
{
const struct cred *cred = current_cred();
@@ -6316,6 +6324,7 @@ static struct security_operations selinux_ops = {
.sb_set_mnt_opts = selinux_set_mnt_opts,
.sb_clone_mnt_opts = selinux_sb_clone_mnt_opts,
.sb_parse_opts_str = selinux_parse_opts_str,
+ .move_mount = selinux_move_mount,
.dentry_init_security = selinux_dentry_init_security,
.dentry_create_files_as = selinux_dentry_create_files_as,
More information about the Devel
mailing list