[Devel] [PATCH RH7] overlayfs: avoid permission check for priveleged processes

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Oct 14 12:33:53 MSK 2020


On 10/14/20 2:05 AM, Andrey Zhadchenko wrote:
> Overlayfs temporary override credentials in copy_up function to ones which was
> used to create mount.

> Unfortunately vfs_setxattr requires CAP_SYS_ADMIN
> capability in current user namespace.

No, if it was so, it would be no error =) To be correct we should say:

Function vfs_setxattr for "trusted." attrs requires CAP_SYS_ADMIN in 
current ve's userns.

It is done so to mimic mainstream behaviour for containers(ves), so that 
container user can't set "trusted." xattrs if it is in non-root 
container userns.

> This leads to strange situations.
> For example, if overlayfs mount was made inside ve it is impossible to use
> copy_up from init_user_ns even with CAP_SYS_ADMIN. This is because overriden
> credentials are not sufficient in init_user_ns to set xattr to file.
> This is also required for criu since copy_up can be triggered on dump stage:
> reading inotify fhandle from /proc may start copy_up.

I hope that overlayfs overrides credentials exactly to be able to pass 
those checks. In mainstream kernel overlayfs can used from any userns, 
but can be only mounted from init_user_ns, so credentials always change 
to "more permissive". So it should be safe to skip override in case we 
are already "more permissive" than superblocks credentials.

> 
> Add an option to avoid vfs_setxattr CAP_SYS_ADMIN check if current credentials
> have CAP_SYS_ADMIN in namespace that is recorded in overlayfs mount superblock.

Sorry but looking on the code I don't see how it works... There are only 
three codepaths here:

   +-< ovl_do_setxattr_ext
     +-< ovl_do_setxattr #1 sets propagate_cap to false
     +-< ovl_check_setxattr_ext
     | +-< ovl_set_origin_ext
     | | +-< ovl_set_origin #2 sets propagate_cap to false
     | +-< ovl_check_setxattr #3 sets propagate_cap to false

And on all of them we don't "propagate_cap". Probably I'm missing 
something though.

> 
> https://jira.sw.ru/browse/PSBM-108122
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
>   fs/overlayfs/copy_up.c   | 25 +++++++++++++++++++------
>   fs/overlayfs/overlayfs.h | 39 ++++++++++++++++++++++++++-------------
>   fs/overlayfs/util.c      | 32 ++++++++++++++++++++++++++++----
>   fs/xattr.c               |  2 +-
>   4 files changed, 74 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> index 1564a35..d6b285f 100644
> --- a/fs/overlayfs/copy_up.c
> +++ b/fs/overlayfs/copy_up.c
> @@ -20,6 +20,7 @@
>   #include <linux/fdtable.h>
>   #include <linux/ratelimit.h>
>   #include <linux/exportfs.h>
> +#include <linux/capability.h>
>   #include "overlayfs.h"
>   
>   #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
> @@ -321,8 +322,8 @@ out:
>   	return fh;
>   }
>   
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper)
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap)
>   {
>   	const struct ovl_fh *fh = NULL;
>   	int err;
> @@ -341,8 +342,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
>   	/*
>   	 * Do not fail when upper doesn't support xattrs.
>   	 */
> -	err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
> -				 fh ? fh->len : 0, 0);
> +	err = ovl_check_setxattr_ext(dentry, upper, OVL_XATTR_ORIGIN, fh,
> +				 fh ? fh->len : 0, 0, propagate_cap);
>   	kfree(fh);
>   
>   	return err;
> @@ -433,6 +434,7 @@ struct ovl_copy_up_ctx {
>   	struct dentry *destdir;
>   	struct qstr destname;
>   	struct dentry *workdir;
> +	int propagate_cap;
>   	bool tmpfile;
>   	bool origin;
>   	bool indexed;
> @@ -711,7 +713,7 @@ out:
>   }
>   
>   static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
> -			   int flags)
> +			   int flags, int propagate_cap)
>   {
>   	int err;
>   	struct path parentpath;
> @@ -719,6 +721,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>   		.parent = parent,
>   		.dentry = dentry,
>   		.workdir = ovl_workdir(dentry),
> +		.propagate_cap = propagate_cap,
>   	};
>   
>   	if (WARN_ON(!ctx.workdir))
> @@ -768,9 +771,19 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
>   	return err;
>   }
>   
> +static int ovl_can_propagate_cap(struct dentry *dentry)
> +{
> +	struct super_block *sb = dentry->d_sb;
> +	struct ovl_fs *ofs = sb->s_fs_info;
> +	struct user_namespace *ovl_ns = ofs->creator_cred->user_ns;
> +
> +	return ns_capable(ovl_ns, CAP_SYS_ADMIN);
> +}
> +
>   int ovl_copy_up_flags(struct dentry *dentry, int flags)
>   {
>   	int err = 0;
> +	int propagate_cap = ovl_can_propagate_cap(dentry);
>   	const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
>   	bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
>   
> @@ -815,7 +828,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
>   			next = parent;
>   		}
>   
> -		err = ovl_copy_up_one(parent, next, flags);
> +		err = ovl_copy_up_one(parent, next, flags, propagate_cap);
>   
>   		dput(parent);
>   		dput(next);
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 7052938..6917acd 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -149,15 +149,6 @@ static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
>   	return err;
>   }
>   
> -static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> -				  const void *value, size_t size, int flags)
> -{
> -	int err = vfs_setxattr(dentry, name, value, size, flags);
> -	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
> -		 dentry, name, min((int)size, 48), value, size, flags, err);
> -	return err;
> -}
> -
>   static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
>   {
>   	int err = vfs_removexattr(dentry, name);
> @@ -245,9 +236,12 @@ int ovl_copy_up_start(struct dentry *dentry);
>   void ovl_copy_up_end(struct dentry *dentry);
>   bool ovl_check_origin_xattr(struct dentry *dentry);
>   bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
>   		       const char *name, const void *value, size_t size,
> -		       int xerr);
> +		       int xerr, int propagate_cap);
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +			const void *value, size_t size, int flags,
> +			int propagate_cap);
>   int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
>   void ovl_set_flag(unsigned long flag, struct inode *inode);
>   void ovl_clear_flag(unsigned long flag, struct inode *inode);
> @@ -279,6 +273,19 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
>   	return ofs->xino_bits;
>   }
>   
> +static inline int ovl_check_setxattr(struct dentry *dentry,
> +		struct dentry *upperdentry, const char *name,
> +		const void *value, size_t size, int xerr)
> +{
> +	return ovl_check_setxattr_ext(dentry, upperdentry, name, value, size,
> +				      xerr, 0);
> +}
> +
> +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags)
> +{
> +	return ovl_do_setxattr_ext(dentry, name, value, size, flags, 0);
> +}
>   
>   /* namei.c */
>   int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
> @@ -400,8 +407,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags);
>   int ovl_copy_xattr(struct dentry *old, struct dentry *new);
>   int ovl_set_attr(struct dentry *upper, struct kstat *stat);
>   struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
> -int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> -		   struct dentry *upper);
> +int ovl_set_origin_ext(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper, int propagate_cap);
> +
> +static inline int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
> +		   struct dentry *upper)
> +{
> +	return ovl_set_origin_ext(dentry, lower, upper, 0);
> +}
>   
>   /* export.c */
>   extern const struct export_operations ovl_export_operations;
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index 52a5116..30c10f1 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -419,9 +419,32 @@ bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
>   	return false;
>   }
>   
> -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
> -		       const char *name, const void *value, size_t size,
> -		       int xerr)
> +int ovl_do_setxattr_ext(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int propagate_cap)
> +{
> +	int err = vfs_setxattr(dentry, name, value, size, flags);
> +
> +	if (propagate_cap && err == -EPERM) {
> +		struct inode *ino = dentry->d_inode;
> +
> +		if (IS_IMMUTABLE(ino) || IS_APPEND(ino))
> +			goto out;
> +
> +		inode_lock(ino);
> +		err = __vfs_setxattr_noperm(dentry, name, value, size, flags);
> +		inode_unlock(ino);

I don't like this method. We have different permission checks in 
vfs_setxattr (e.g.: xattr_permission and security_inode_setxattr), by 
this we just ignore all of them, but actually need only 
xattr_permission's trusted part.

I would feel much better if we just don't override creds in overlay if 
our current cred is already more permissive then the overriding one. 
Because we can face ve_capable checks somewhere else later unexpectedly 
and will need to debug it all again. Probably it's better to allow 
everything for a host user?

Maybe we need to discuss this with company of @den @khorenko @vvs.

> +	}
> +
> +out:
> +	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i, propagate_cap = %d\n",
> +		 dentry, name, min((int)size, 48), value, size, flags, err,
> +		 propagate_cap);
> +	return err;
> +}
> +
> +int ovl_check_setxattr_ext(struct dentry *dentry, struct dentry *upperdentry,
> +			   const char *name, const void *value, size_t size,
> +			   int xerr, int propagate_cap)
>   {
>   	int err;
>   	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> @@ -429,7 +452,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
>   	if (ofs->noxattr)
>   		return xerr;
>   
> -	err = ovl_do_setxattr(upperdentry, name, value, size, 0);
> +	err = ovl_do_setxattr_ext(upperdentry, name, value, size, 0,
> +			      propagate_cap);
>   
>   	if (err == -EOPNOTSUPP) {
>   		pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 9c24acc..c164e83 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -124,7 +124,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
>   
>   	return error;
>   }
> -
> +EXPORT_SYMBOL_GPL(__vfs_setxattr_noperm);
>   
>   int
>   vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list