[Devel] [vz7 PATCH 2/2] devcg: Allow wildcard exceptions in DENY child cgroups PSBM-144033
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Thu Jan 19 14:22:14 MSK 2023
On 16.12.2022 17:38, Nikolay Borisov wrote:
> In containerized environments there arise cases where we might want to
> allow wildcard exceptions when the parent cg doesn't have such. This for
> example arises when systemd services are being setup in containers. In
> order to allow systemd to function we must allow it to write wildcard
> (i.e b *:* rwm) rules in the child group. At the same time in order not
> to break the fundamental invariant of the device cgroup hierarchy that
> children cannot be more permissive than their parents instead of blindly
> trusting those rules, simply skip them in the child cgroup and defer to
> the parent's exceptions.
>
> For example assume we have A/B, where A has default behavior 'deny' and
> B was created afterwards and subsequently also has 'deny' default
> behavior. With this patch it's possible to write "b *:* rwm" in B which
> would also result in EPERM when trying to access any device that doesn't
> contain an exception in A:
>
> mkdir A
> echo "a" > A/devices.deny
> mkdir A/B
> echo "c *:*" > A/B/devices.allow <-- allows to create the exception
> but it's essentially a noop
>
> echo "c 1:3 rw" > A < -- now trying to open /dev/nul (matching 1:3)
> by a process in B would succeed.
>
> Implementing this doesn't really break the main invariant that children
> shouldn't have more access than their ancestors.
>
> Signed-off-by: Nikolay Borisov <nikolay.borisov at virtuozzo.com>
> ---
> security/device_cgroup.c | 54 +++++++++++++++++++++++++++++++---------
> 1 file changed, 42 insertions(+), 12 deletions(-)
>
> diff --git a/security/device_cgroup.c b/security/device_cgroup.c
> index f7948334e318..302159d21d15 100644
> --- a/security/device_cgroup.c
> +++ b/security/device_cgroup.c
> @@ -61,6 +61,11 @@ struct dev_cgroup {
> struct list_head propagate_pending;
> };
>
> +static bool is_wildcard_exception(struct dev_exception_item *ex)
> +{
> + return ex->minor == ~0 || ex->major == ~0;
> +}
> +
> static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
> {
> return container_of(s, struct dev_cgroup, css);
> @@ -224,6 +229,7 @@ static int devcgroup_online(struct cgroup *cgroup)
> if (!ret)
> dev_cgroup->behavior = parent_dev_cgroup->behavior;
> }
> +
> mutex_unlock(&devcgroup_mutex);
>
> return ret;
> @@ -434,9 +440,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
>
> /**
> * match_exception - iterates the exception list trying to match a rule
> - * based on type, major, minor and access type. It is
> - * considered a match if an exception is found that
> - * will contain the entire range of provided parameters.
> + * based on type, major, minor and access type. It is
> + * considered a match if an exception is found that
> + * will contain the entire range of provided parameters.
> * @exceptions: list of exceptions
> * @type: device type (DEV_BLOCK or DEV_CHAR)
> * @major: device file major number, ~0 to match all
> @@ -446,10 +452,11 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
> * returns: true in case it matches an exception completely
> */
> static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
> - u32 major, u32 minor, short access)
> + u32 major, u32 minor, short access, bool check_parent)
> {
> struct dev_exception_item *ex;
> struct cgroup *cgrp = dev_cgroup->css.cgroup;
> + bool wildcard_skipped = false;
> struct list_head *exceptions = &dev_cgroup->exceptions;
>
> list_for_each_entry_rcu(ex, exceptions, list) {
> @@ -464,6 +471,11 @@ static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
> continue;
> if (ex->minor != ~0 && ex->minor != minor)
> continue;
> + /* skip wildcard rule if we are child cg */
> + if (is_wildcard_exception(ex) && !test_bit(CGRP_VE_ROOT, &cgrp->flags)) {
> + wildcard_skipped = true;
> + continue;
> + }
>
> /* provided access cannot have more than the exception rule */
> mismatched_bits = access & (~ex->access) & ~ACC_MOUNT;
> @@ -477,9 +489,26 @@ static bool match_exception(struct dev_cgroup *dev_cgroup, short type,
> return true;
> }
>
> + /* we matched a wildcard rule, so let's check for a
> + * more specific one in the parent
> + */
> + if (wildcard_skipped && check_parent) {
> + struct cgroup *parent = cgrp->parent;
> + struct dev_cgroup *devcg_parent = cgroup_to_devcgroup(parent);
> + if (devcg_parent->behavior == DEVCG_DEFAULT_ALLOW)
> + /* Can't match any of the exceptions, even partially */
> + return !match_exception_partial(&devcg_parent->exceptions,
> + type, major, minor, access);
> + else
> + /* Need to match completely one exception to be allowed */
> + return match_exception(devcg_parent, type, major,
> + minor, access, false);
> + }
> +
> return false;
> }
>
> +
> /**
> * verify_new_ex - verifies if a new exception is part of what is allowed
> * by a dev cgroup based on the default policy +
> @@ -504,14 +533,14 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
> /*
> * new exception in the child doesn't matter, only
> * adding extra restrictions
> - */
> + */
nit
> return true;
> } else {
> /*
> * new exception in the child will add more devices
> * that can be acessed, so it can't match any of
> * parent's exceptions, even slightly
> - */
> + */
nit
> match = match_exception_partial(&dev_cgroup->exceptions,
> refex->type,
> refex->major,
> @@ -529,9 +558,8 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
> * be contained completely in an parent's exception to be
> * allowed
> */
> - match = match_exception(&dev_cgroup->exceptions, refex->type,
> - refex->major, refex->minor,
> - refex->access);
> + match = match_exception(dev_cgroup, refex->type, refex->major,
> + refex->minor, refex->access, false);
>
> if (match)
> /* parent has an exception that matches the proposed */
> @@ -705,6 +733,7 @@ static inline bool has_children(struct dev_cgroup *devcgroup)
> return !list_empty(&cgrp->children);
> }
>
> +
> /*
> * Modify the exception list using allow/deny rules.
> * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD
> @@ -864,8 +893,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
> break;
> }
>
> - if (!parent_has_perm(devcgroup, &ex))
> + if (!is_wildcard_exception(&ex) &&
> + !parent_has_perm(devcgroup, &ex))
> return -EPERM;
> +
> rc = dev_exception_add(devcgroup, &ex);
> break;
> case DEVCG_DENY:
> @@ -956,8 +987,7 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
> type, major, minor, access);
> else
> /* Need to match completely one exception to be allowed */
> - rc = match_exception(&dev_cgroup->exceptions, type, major,
> - minor, access);
> + rc = match_exception(dev_cgroup, type, major, minor, access, true);
> rcu_read_unlock();
>
> #ifdef CONFIG_VE
> --
> 2.34.1
>
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
More information about the Devel
mailing list