[Devel] [PATCH RHEL7 COMMIT] ms/netfilter: Allow xt_owner in any user namespace

Konstantin Khorenko khorenko at virtuozzo.com
Tue Oct 17 19:19:32 MSK 2017


The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.1.1.vz7.37.15
------>
commit 4bf5a791dc638cd5466a14e46df2278b05fc3a32
Author: Eric W. Biederman <ebiederm at xmission.com>
Date:   Tue Oct 17 19:19:32 2017 +0300

    ms/netfilter: Allow xt_owner in any user namespace
    
    ML: 9847371a84b0be330f4bc4aaa98904101ee8573d
    
    Making this work is a little tricky as it really isn't kosher to
    change the xt_owner_match_info in a check function.
    
    Without changing xt_owner_match_info we need to know the user
    namespace the uids and gids are specified in.  In the common case
    net->user_ns == current_user_ns().  Verify net->user_ns ==
    current_user_ns() in owner_check so we can later assume it in
    owner_mt.
    
    In owner_check also verify that all of the uids and gids specified are
    in net->user_ns and that the expected min/max relationship exists
    between the uids and gids in xt_owner_match_info.
    
    In owner_mt get the network namespace from the outgoing socket, as this
    must be the same network namespace as the netfilter rules, and use that
    network namespace to find the user namespace the uids and gids in
    xt_match_owner_info are encoded in.  Then convert from their encoded
    from into the kernel internal format for uids and gids and perform the
    owner match.
    
    Similar to ping_group_range, this code does not try to detect
    noncontiguous UID/GID ranges.
    
    Signed-off-by: "Eric W. Biederman" <ebiederm at xmission.com>
    Signed-off-by: Kevin Cernekee <cernekee at chromium.org>
    Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
    
    https://jira.sw.ru/browse/PSBM-69409
    
    Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
    
    Q: Why do we need to support ipt_owner in nested user ns?
    
    avagin@:
    The kernel grabs userns when we send START to the "state" file of a
    container ve cgroup. But vzctl does this after restoring a container,
    so we don't know a ve userns, when we are retoriung iptable rules.
    
    CRIU can't dump nested userns, so if any app will create a new userns,
    criu dump will return an error.
    
    Note: this patch does not cover older version of iptables protocol
    (iptables-1.3.5 CentOS 5), it will be covered by a separate patch.
---
 net/netfilter/xt_owner.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 31dec4a..1744f78 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -80,11 +80,39 @@ owner_mt6_v0(const struct sk_buff *skb, struct xt_action_param *par)
 static int owner_check(const struct xt_mtchk_param *par)
 {
 	struct xt_owner_match_info *info = par->matchinfo;
+	struct net *net = par->net;
 
-	/* For now only allow adding matches from the initial user namespace */
+	/* Only allow the common case where the userns of the writer
+	 * matches the userns of the network namespace.
+	 */
 	if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
-	    !current_user_ns_initial())
+	    (current_user_ns() != net->user_ns))
 		return -EINVAL;
+
+	/* Ensure the uids are valid */
+	if (info->match & XT_OWNER_UID) {
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+		if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+		    (info->uid_max < info->uid_min) ||
+		    uid_lt(uid_max, uid_min)) {
+			return -EINVAL;
+		}
+	}
+
+	/* Ensure the gids are valid */
+	if (info->match & XT_OWNER_GID) {
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+		if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+		    (info->gid_max < info->gid_min) ||
+		    gid_lt(gid_max, gid_min)) {
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
@@ -93,6 +121,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
+	struct net *net = dev_net(par->in ? par->in : par->out);
 
 	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
 		return (info->match ^ info->invert) == 0;
@@ -109,8 +138,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
 
 	if (info->match & XT_OWNER_UID) {
-		kuid_t uid_min = make_kuid(ve_init_user_ns(), info->uid_min);
-		kuid_t uid_max = make_kuid(ve_init_user_ns(), info->uid_max);
+		kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+		kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
 		if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
 		     uid_lte(filp->f_cred->fsuid, uid_max)) ^
 		    !(info->invert & XT_OWNER_UID))
@@ -118,8 +147,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	if (info->match & XT_OWNER_GID) {
-		kgid_t gid_min = make_kgid(ve_init_user_ns(), info->gid_min);
-		kgid_t gid_max = make_kgid(ve_init_user_ns(), info->gid_max);
+		kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+		kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
 		if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
 		     gid_lte(filp->f_cred->fsgid, gid_max)) ^
 		    !(info->invert & XT_OWNER_GID))


More information about the Devel mailing list