[Devel] [RFC v14][PATCH 21/54] Define subtree flag and unpriv_allowed sysctl
Oren Laadan
orenl at cs.columbia.edu
Tue Apr 28 16:23:51 PDT 2009
Define a sysctl 'ckpt_unpriv_allowed' which determines whether all
checkpoints and restarts require CAP_SYS_ADMIN. If it is 1, then
regular permission checks are intended to prevent privilege
escalation, but leaving it at 0 prevents unprivileged users from
exploiting any privilege escalation bugs.
Define a CHECKPOINT_SUBTREE flag for sys_checkpoint() which allows to
checkpoint a subtree of processes. Otherwise, the syscall expects to
checkpoint an entire container (in the sense of a pid namespace),
starting with the container init task.
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
checkpoint/checkpoint.c | 4 ++++
checkpoint/restart.c | 2 +-
checkpoint/sys.c | 17 +++++++++++++++--
include/linux/checkpoint_types.h | 12 +++++++++++-
kernel/sysctl.c | 19 +++++++++++++++++++
5 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 0299046..6305e5d 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -423,6 +423,10 @@ static int get_container(struct ckpt_ctx *ctx, pid_t pid)
ctx->root_nsproxy = nsproxy;
ctx->root_init = is_container_init(task);
+ /* FIX: does this error code makes sense here ? */
+ if (!(ctx->flags & CHECKPOINT_SUBTREE) && !ctx->root_init)
+ return -EBUSY;
+
return 0;
out:
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index edc89ba..e5a29fb 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -287,7 +287,7 @@ static int restore_read_header(struct ckpt_ctx *ctx)
h->minor != ((LINUX_VERSION_CODE >> 8) & 0xff) ||
h->patch != ((LINUX_VERSION_CODE) & 0xff))
goto out;
- if (h->flags & ~CKPT_CTX_CHECKPOINT)
+ if (h->flags & ~(CKPT_CTX_CHECKPOINT | CKPT_USER_FLAGS))
goto out;
if (h->uts_release_len != sizeof(uts->release) ||
h->uts_version_len != sizeof(uts->version) ||
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index a613748..e3f7012 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -21,6 +21,13 @@
#include <linux/checkpoint.h>
/*
+ * ckpt_unpriv_allowed - sysctl_controlled, do not allow checkpoint of
+ * a set of tasks which do not form a fully isolated container, if 0.
+ */
+int ckpt_unpriv_allowed = 1; /* default: yes */
+
+
+/*
* Helpers to write(read) from(to) kernel space to(from) the checkpoint
* image file descriptor (similar to how a core-dump is performed).
*
@@ -296,10 +303,13 @@ asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags)
struct ckpt_ctx *ctx;
int ret;
- /* no flags for now */
- if (flags)
+ /* check user flags */
+ if (flags & ~CKPT_USER_FLAGS)
return -EINVAL;
+ if (!ckpt_unpriv_allowed && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
if (pid == 0)
pid = current->pid;
ctx = ckpt_ctx_alloc(fd, flags | CKPT_CTX_CHECKPOINT);
@@ -334,6 +344,9 @@ asmlinkage long sys_restart(int crid, int fd, unsigned long flags)
if (flags)
return -EINVAL;
+ if (!ckpt_unpriv_allowed && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
/* FIXME: for now, we use 'crid' as a pid */
pid = (pid_t) crid;
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 85eb184..09d3238 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -10,6 +10,13 @@
* distribution for more details.
*/
+#define CKPT_VERSION 1
+
+#define CHECKPOINT_SUBTREE 0x4
+
+
+#ifdef __KERNEL__
+
struct ckpt_ctx;
#include <linux/list.h>
@@ -19,7 +26,6 @@ struct ckpt_ctx;
#include <linux/sched.h>
#include <asm/atomic.h>
-#define CKPT_VERSION 1
struct ckpt_ctx {
int crid; /* unique checkpoint id */
@@ -67,5 +73,9 @@ struct ckpt_ctx {
#define CKPT_CTX_CHECKPOINT 0x1
#define CKPT_CTX_RESTART 0x2
+#define CKPT_USER_FLAGS (CHECKPOINT_SUBTREE)
+
+
+#endif /* __KERNEL__ */
#endif /* _LINUX_CHECKPOINT_TYPES_H_ */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e3d2c7d..21f9c48 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -192,6 +192,10 @@ int sysctl_legacy_va_layout;
extern int prove_locking;
extern int lock_stat;
+#ifdef CONFIG_CHECKPOINT
+extern int ckpt_unpriv_allowed;
+#endif
+
/* The default sysctl tables: */
static struct ctl_table root_table[] = {
@@ -910,6 +914,20 @@ static struct ctl_table kern_table[] = {
.child = slow_work_sysctls,
},
#endif
+#ifdef CONFIG_CHECKPOINT
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ckpt_unpriv_allowed",
+ .data = &ckpt_unpriv_allowed,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
@@ -1302,6 +1320,7 @@ static struct ctl_table vm_table[] = {
.proc_handler = &scan_unevictable_handler,
},
#endif
+
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt
--
1.5.4.3
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list