[Devel] [PATCH RHEL7 COMMIT] ve/cgroup: devices -- Modify exception list for docker sake

Konstantin Khorenko khorenko at odin.com
Wed May 6 09:34:38 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.4.10
------>
commit b41a8db9cdf3a598c9abe35cb968b0ab476e8eeb
Author: Cyrill Gorcunov <gorcunov at odin.com>
Date:   Wed May 6 20:34:38 2015 +0400

    ve/cgroup: devices -- Modify exception list for docker sake
    
    When docker runs up it modifies nested device cgroups. The devices it needs
    to operate with are almost the same we've had in our exception list already
    except:
    
     1) Add ACC_MKNOD for every device we have
    
    	This is harmless operation simply to make docker happy.
    
     2) Add setting up ACC_MKNOD for devices created for container
        via set_device_perms_ve. At the moment this is important
        for VT use inside container.
    
     3) Add MISC_MAJOR:200 for tun device
    
    	Tun/tap is safe to use inside container as far as I know.
    
        p.s. khorenko@ approved this kind of change in pcs7.
    
     4) For some reason docker requires write access to /dev/random,
        grand it (since we're prohibiting writing to /dev/random
        from inside of ve on kernel level, it's safe to do).
    
    v2:
     - Use ns_capable(CAP_VE_SYS_ADMIN) instead of plain capable(CAP_SYS_ADMIN)
       for docker sake. Note the vanilla kernel no longer has any can_attach
       helper, but to make the patch smaller lets keep it. ns_capable should
       be enough for security, after all the user in container may attach own
       tasks only.
    
    v3:
     - Use nsown_capable.
    
    v4:
     - Switch back to plain capable test. It turned out that vanilla
       kernel has no cap test in devcgroup_can_attach (neither it
       has this helper), while nsown_capable looks like be too relaxed.
       So I think we could use plain capable() as we do in PCS6 kernel
       same time requiring CAP_VE_SYS_ADMIN to present inside container.
    
    Signed-off-by: Cyrill Gorcunov <gorcunov at odin.com>
    Acked-by: Konstantin Khorenko <khorenko at odin.com>
    
    CC: Vladimir Davydov <vdavydov at odin.com>
    CC: Pavel Emelyanov <xemul at odin.com>
    CC: Andrey Vagin <avagin at odin.com>
---
 security/device_cgroup.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 53adb00..31024f7 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -16,6 +16,7 @@
 #include <uapi/linux/vzcalluser.h>
 #include <linux/major.h>
 #include <linux/module.h>
+#include <linux/capability.h>
 
 #define ACC_MKNOD 1
 #define ACC_READ  2
@@ -80,7 +81,7 @@ static int devcgroup_can_attach(struct cgroup *new_cgrp,
 {
 	struct task_struct *task = cgroup_taskset_first(set);
 
-	if (current != task && !capable(CAP_SYS_ADMIN))
+	if (current != task && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -662,7 +663,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	struct cgroup *p = devcgroup->css.cgroup;
 	struct dev_cgroup *parent = NULL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN))
 		return -EPERM;
 
 	if (p->parent)
@@ -984,21 +985,22 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 #ifdef CONFIG_VE
 
 static struct dev_exception_item default_whitelist_items[] = {
-	{ ~0,				~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD },
-	{ ~0,				~0, DEV_BLOCK, ACC_HIDDEN | ACC_MKNOD },
-	{ UNIX98_PTY_MASTER_MAJOR,	~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ UNIX98_PTY_SLAVE_MAJOR,	~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ PTY_MASTER_MAJOR,		~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ PTY_SLAVE_MAJOR,		~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ MEM_MAJOR,	/* null */	3, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ MEM_MAJOR,	/* zero */	5, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ MEM_MAJOR,	/* full */	7, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ TTYAUX_MAJOR,	/* tty */	0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ TTYAUX_MAJOR,	/* console */	1, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ TTYAUX_MAJOR,	/* ptmx */	2, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ MEM_MAJOR,	/* random */	8, DEV_CHAR, ACC_HIDDEN | ACC_READ },
-	{ MEM_MAJOR,	/* urandom */	9, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE },
-	{ MEM_MAJOR,	/* kmsg */	11, DEV_CHAR, ACC_HIDDEN | ACC_WRITE },
+	{ ~0,				~0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD },
+	{ ~0,				~0,	DEV_BLOCK,	ACC_HIDDEN | ACC_MKNOD },
+	{ UNIX98_PTY_MASTER_MAJOR,	~0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE },
+	{ UNIX98_PTY_SLAVE_MAJOR,	~0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE },
+	{ PTY_MASTER_MAJOR,		~0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE },
+	{ PTY_SLAVE_MAJOR,		~0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE },
+	{ MEM_MAJOR,			3,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* null */
+	{ MEM_MAJOR,			5,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* zero */
+	{ MEM_MAJOR,			7,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* full */
+	{ TTYAUX_MAJOR,			0,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* tty */
+	{ TTYAUX_MAJOR,			1,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* console */
+	{ TTYAUX_MAJOR,			2,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* ptmx */
+	{ MEM_MAJOR,			8,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* random */
+	{ MEM_MAJOR,			9,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* urandom */
+	{ MEM_MAJOR,			11,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_WRITE },            /* kmsg */
+	{ MISC_MAJOR,			200,	DEV_CHAR,	ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* tun */
 };
 
 static LIST_HEAD(default_whitelist);
@@ -1069,7 +1071,7 @@ int devcgroup_set_perms_ve(struct cgroup *cgroup,
 	else
 		return -EINVAL;
 
-	new.access = decode_ve_perms(mask);
+	new.access = decode_ve_perms(mask) | (mask ? ACC_MKNOD : 0);
 	new.major = new.minor = ~0;
 
 	switch (type & VE_USE_MASK) {



More information about the Devel mailing list