[Devel] [vz7 PATCH 2/2] devcg: Allow wildcard exceptions in DENY child cgroups PSBM-144033

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Jan 19 14:22:14 MSK 2023



On 16.12.2022 17:38, Nikolay Borisov wrote:
> In containerized environments there arise cases where we might want to
> allow wildcard exceptions when the parent cg doesn't have such. This for
> example arises when systemd services are being setup in containers. In
> order to allow systemd to function we must allow it to write wildcard
> (i.e b *:* rwm) rules in the child group. At the same time in order not
> to break the fundamental invariant of the device cgroup hierarchy that
> children cannot be more permissive than their parents instead of blindly
> trusting those rules, simply skip them in the child cgroup and defer to
> the parent's exceptions.
> 
> For example assume we have A/B, where A has default behavior 'deny' and
> B was created afterwards and subsequently also has 'deny' default
> behavior. With this patch it's possible to write "b *:* rwm" in B which
> would also result in EPERM when trying to access any device that doesn't
> contain an exception in A:
> 
>      mkdir A
>      echo "a" > A/devices.deny
>      mkdir A/B
>      echo "c *:*" > A/B/devices.allow <-- allows to create the exception
> 					 but it's essentially a noop
> 
>      echo "c 1:3 rw" > A < -- now trying to open /dev/nul (matching 1:3)
> 			     by a process in B would succeed.
> 
> Implementing this doesn't really break the main invariant that children
> shouldn't have more access than their ancestors.
> 
> Signed-off-by: Nikolay Borisov <nikolay.borisov at virtuozzo.com>
> ---
>   security/device_cgroup.c | 54 +++++++++++++++++++++++++++++++---------
>   1 file changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/security/device_cgroup.c b/security/device_cgroup.c
> index f7948334e318..302159d21d15 100644
> --- a/security/device_cgroup.c
> +++ b/security/device_cgroup.c
> @@ -61,6 +61,11 @@ struct dev_cgroup {
>   	struct list_head propagate_pending;
>   };
> 
> +static bool is_wildcard_exception(struct dev_exception_item *ex)
> +{
> +	return ex->minor == ~0 || ex->major == ~0;
> +}
> +
>   static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
>   {
>   	return container_of(s, struct dev_cgroup, css);
> @@ -224,6 +229,7 @@ static int devcgroup_online(struct cgroup *cgroup)
>   		if (!ret)
>   			dev_cgroup->behavior = parent_dev_cgroup->behavior;
>   	}
> +
>   	mutex_unlock(&devcgroup_mutex);
> 
>   	return ret;
> @@ -434,9 +440,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
> 
>   /**
>    * match_exception	- iterates the exception list trying to match a rule
> - *			  based on type, major, minor and access type. It is
> - *			  considered a match if an exception is found that
> - *			  will contain the entire range of provided parameters.
> + * 			  based on type, major, minor and access type. It is
> + * 			  considered a match if an exception is found that
> + * 			  will contain the entire range of provided parameters.
>    * @exceptions: list of exceptions
>    * @type: device type (DEV_BLOCK or DEV_CHAR)
>    * @major: device file major number, ~0 to match all
> @@ -446,10 +452,11 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
>    * returns: true in case it matches an exception completely
>    */
>   static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
> -			    u32 major, u32 minor, short access)
> +			    u32 major, u32 minor, short access, bool check_parent)
>   {
>   	struct dev_exception_item *ex;
>   	struct cgroup *cgrp = dev_cgroup->css.cgroup;
> +	bool wildcard_skipped = false;
>   	struct list_head *exceptions = &dev_cgroup->exceptions;
> 
>   	list_for_each_entry_rcu(ex, exceptions, list) {
> @@ -464,6 +471,11 @@ static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
>   			continue;
>   		if (ex->minor != ~0 && ex->minor != minor)
>   			continue;
> +		/* skip wildcard rule if we are child cg */
> +		if (is_wildcard_exception(ex) && !test_bit(CGRP_VE_ROOT, &cgrp->flags)) {
> +			wildcard_skipped = true;
> +			continue;
> +		}
> 
>   		/* provided access cannot have more than the exception rule */
>   		mismatched_bits = access & (~ex->access) & ~ACC_MOUNT;
> @@ -477,9 +489,26 @@ static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
>   		return true;
>   	}
> 
> +	/* we matched a wildcard rule, so let's check for a
> +	 * more specific one in the parent
> +	 */
> +	if (wildcard_skipped && check_parent) {
> +		struct cgroup *parent = cgrp->parent;
> +		struct dev_cgroup *devcg_parent = cgroup_to_devcgroup(parent);
> +		if (devcg_parent->behavior == DEVCG_DEFAULT_ALLOW)
> +			/* Can't match any of the exceptions, even partially */
> +			return  !match_exception_partial(&devcg_parent->exceptions,
> +						      type, major, minor, access);
> +		else
> +			/* Need to match completely one exception to be allowed */
> +			return match_exception(devcg_parent, type, major,
> +					       minor, access, false);
> +	}
> +
>   	return false;
>   }
> 
> +
>   /**
>    * verify_new_ex - verifies if a new exception is part of what is allowed
>    *		   by a dev cgroup based on the default policy +
> @@ -504,14 +533,14 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
>   			/*
>   			 * new exception in the child doesn't matter, only
>   			 * adding extra restrictions
> -			 */
> +			 */

nit

>   			return true;
>   		} else {
>   			/*
>   			 * new exception in the child will add more devices
>   			 * that can be acessed, so it can't match any of
>   			 * parent's exceptions, even slightly
> -			 */
> +			 */

nit

>   			match = match_exception_partial(&dev_cgroup->exceptions,
>   							refex->type,
>   							refex->major,
> @@ -529,9 +558,8 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
>   		 * be contained completely in an parent's exception to be
>   		 * allowed
>   		 */
> -		match = match_exception(&dev_cgroup->exceptions, refex->type,
> -					refex->major, refex->minor,
> -					refex->access);
> +		match = match_exception(dev_cgroup, refex->type, refex->major,
> +					refex->minor, refex->access, false);
> 
>   		if (match)
>   			/* parent has an exception that matches the proposed */
> @@ -705,6 +733,7 @@ static inline bool has_children(struct dev_cgroup *devcgroup)
>   	return !list_empty(&cgrp->children);
>   }
> 
> +
>   /*
>    * Modify the exception list using allow/deny rules.
>    * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
> @@ -864,8 +893,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
>   			break;
>   		}
> 
> -		if (!parent_has_perm(devcgroup, &ex))
> +		if (!is_wildcard_exception(&ex) &&
> +		    !parent_has_perm(devcgroup, &ex))
>   			return -EPERM;
> +
>   		rc = dev_exception_add(devcgroup, &ex);
>   		break;
>   	case DEVCG_DENY:
> @@ -956,8 +987,7 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
>   					      type, major, minor, access);
>   	else
>   		/* Need to match completely one exception to be allowed */
> -		rc = match_exception(&dev_cgroup->exceptions, type, major,
> -				     minor, access);
> +		rc = match_exception(dev_cgroup, type, major, minor, access, true);
>   	rcu_read_unlock();
> 
>   #ifdef CONFIG_VE
> --
> 2.34.1
> 
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list