[CRIU] [PATCH 4/4] Move dump of credentials to parasite

Sophie Blee-Goldman ableegoldman at google.com
Fri Aug 8 22:23:09 PDT 2014


Moves the dump of credentials to the parasite, in case the process
lives in a user namespace and has different uids, gids, or
capabilities inside it.

Signed-off-by: Sophie Blee-Goldman <ableegoldman at google.com>

diff --git a/arch/arm/syscall.def b/arch/arm/syscall.def
index c539e3e..5df83cb 100644
--- a/arch/arm/syscall.def
+++ b/arch/arm/syscall.def
@@ -54,11 +54,14 @@ readlink			78	85	(const char *path, char *buf, int bufsize)
 umask				166	60	(int mask)
 getgroups			158	205	(int gsize, unsigned int *groups)
 setresuid			147	164	(int uid, int euid, int suid)
+getresuid			148	165	(unsigned int *uid, unsigned int *euid, unsigned int *suid)
 setresgid			149	170	(int gid, int egid, int sgid)
+getresgid			150	171	(unsigned int *uid, unsigned int *euid, unsigned int *suid)
 getpgid				155	132	(pid_t pid)
 setfsuid			151	138	(int fsuid)
 setfsgid			152	139	(int fsgid)
 getsid				156	147	(void)
+capget				90	184	(struct cap_header *h, struct cap_data *d)
 capset				91	185	(struct cap_header *h, struct cap_data *d)
 rt_sigqueueinfo			138	178	(pid_t pid, int sig, siginfo_t *info)
 setpriority			140	97	(int which, int who, int nice)
diff --git a/arch/x86/syscall-x86-64.def b/arch/x86/syscall-x86-64.def
index 878cf79..112366b 100644
--- a/arch/x86/syscall-x86-64.def
+++ b/arch/x86/syscall-x86-64.def
@@ -54,11 +54,14 @@ __NR_readlink		89		sys_readlink		(const char *path, char *buf, int bufsize)
 __NR_umask		95		sys_umask		(int mask)
 __NR_getgroups		115		sys_getgroups		(int gsize, unsigned int *groups)
 __NR_setresuid		117		sys_setresuid		(int uid, int euid, int suid)
+__NR_getresuid		118		sys_getresuid		(unsigned int *uid, unsigned int *euid, unsigned int *suid)
 __NR_setresgid		119		sys_setresgid		(int gid, int egid, int sgid)
+__NR_getresgid		120		sys_getresgid		(unsigned int *uid, unsigned int *euid, unsigned int *suid)
 __NR_getpgid		121		sys_getpgid		(pid_t pid)
 __NR_setfsuid		122		sys_setfsuid		(int fsuid)
 __NR_setfsgid		123		sys_setfsgid		(int fsgid)
 __NR_getsid		124		sys_getsid		(void)
+__NR_capget		125		sys_capget		(struct cap_header *h, struct cap_data *d)
 __NR_capset		126		sys_capset		(struct cap_header *h, struct cap_data *d)
 __NR_rt_sigqueueinfo	129		sys_rt_sigqueueinfo	(pid_t pid, int sig, siginfo_t *info)
 __NR_sigaltstack	131		sys_sigaltstack		(const void *uss, void *uoss)
diff --git a/cr-dump.c b/cr-dump.c
index 1700d9d..b4bc8a8 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -497,8 +497,7 @@ err:
 }
 
 static int dump_task_creds(struct parasite_ctl *ctl,
-			   const struct cr_fdset *fds,
-			   struct proc_status_creds *cr)
+			   const struct cr_fdset *fds)
 {
 	CredsEntry ce = CREDS_ENTRY__INIT;
 
@@ -506,26 +505,6 @@ static int dump_task_creds(struct parasite_ctl *ctl,
 	pr_info("Dumping creds for %d)\n", ctl->pid.real);
 	pr_info("----------------------------------------\n");
 
-	ce.uid   = cr->uids[0];
-	ce.gid   = cr->gids[0];
-	ce.euid  = cr->uids[1];
-	ce.egid  = cr->gids[1];
-	ce.suid  = cr->uids[2];
-	ce.sgid  = cr->gids[2];
-	ce.fsuid = cr->uids[3];
-	ce.fsgid = cr->gids[3];
-
-	BUILD_BUG_ON(CR_CAP_SIZE != PROC_CAP_SIZE);
-
-	ce.n_cap_inh = CR_CAP_SIZE;
-	ce.cap_inh = cr->cap_inh;
-	ce.n_cap_prm = CR_CAP_SIZE;
-	ce.cap_prm = cr->cap_prm;
-	ce.n_cap_eff = CR_CAP_SIZE;
-	ce.cap_eff = cr->cap_eff;
-	ce.n_cap_bnd = CR_CAP_SIZE;
-	ce.cap_bnd = cr->cap_bnd;
-
 	if (parasite_dump_creds(ctl, &ce) < 0)
 		return -1;
 
@@ -1608,7 +1587,7 @@ static int dump_one_task(struct pstree_item *item)
 		goto err_cure;
 	}
 
-	ret = dump_task_creds(parasite_ctl, cr_fdset, &cr);
+	ret = dump_task_creds(parasite_ctl, cr_fdset);
 	if (ret) {
 		pr_err("Dump creds (pid: %d) failed with %d\n", pid, ret);
 		goto err;
diff --git a/include/parasite.h b/include/parasite.h
index af81d85..a6a60bb 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -166,10 +166,24 @@ struct parasite_dump_misc {
  * Calculate how long we can make the groups array in parasite_dump_creds
  * and still fit the struct in one page
  */
-#define PARASITE_MAX_GROUPS	\
-	((PAGE_SIZE - 2 * sizeof(unsigned int)) / sizeof(unsigned int))
+#define PARASITE_MAX_GROUPS							\
+	(PAGE_SIZE								\
+	 - sizeof(unsigned int)			/* cap_last_cap */		\
+	 - 8 * sizeof(unsigned int)		/* uids + gids */		\
+	 - 4 * CR_CAP_SIZE * sizeof(u32)	/* cap_{inh,prm,eff,bnd} */ 	\
+	 - 2 * sizeof(unsigned int)		/* secbits, ngroups*/		\
+	) / sizeof(unsigned int)		/* groups */
 
 struct parasite_dump_creds {
+	unsigned int		cap_last_cap;
+	unsigned int		uids[4];
+	unsigned int		gids[4];
+
+	u32			cap_inh[CR_CAP_SIZE];
+	u32			cap_prm[CR_CAP_SIZE];
+	u32			cap_eff[CR_CAP_SIZE];
+	u32			cap_bnd[CR_CAP_SIZE];
+
 	unsigned int		secbits;
 	unsigned int		ngroups;
 	unsigned int		groups[PARASITE_MAX_GROUPS];
diff --git a/include/prctl.h b/include/prctl.h
index b815b96..70db7b9 100644
--- a/include/prctl.h
+++ b/include/prctl.h
@@ -7,6 +7,9 @@
 #ifndef PR_GET_NAME
 # define PR_GET_NAME		16
 #endif
+#ifndef PR_CAPBSET_READ
+# define PR_CAPBSET_READ	23
+#endif
 #ifndef PR_CAPBSET_DROP
 # define PR_CAPBSET_DROP	24
 #endif
diff --git a/kerndat.c b/kerndat.c
index 0fee20b..3c87f6c 100644
--- a/kerndat.c
+++ b/kerndat.c
@@ -229,6 +229,8 @@ int kerndat_init(void)
 		ret = kerndat_get_dirty_track();
 	if (!ret)
 		ret = init_zero_page_pfn();
+	if (!ret)
+		ret = get_last_cap();
 
 	return ret;
 }
diff --git a/parasite-syscall.c b/parasite-syscall.c
index dad2570..7e52108 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -738,9 +738,29 @@ int parasite_dump_creds(struct parasite_ctl *ctl, CredsEntry *ce)
 	BUILD_BUG_ON(sizeof(*pc) > PAGE_SIZE);
 
 	pc = parasite_args(ctl, struct parasite_dump_creds);
+	pc->cap_last_cap = kern_last_cap;
+
 	if (parasite_execute_daemon(PARASITE_CMD_DUMP_CREDS, ctl) < 0)
 		return -1;
 
+	ce->uid   = pc->uids[0];
+	ce->gid   = pc->gids[0];
+	ce->euid  = pc->uids[1];
+	ce->egid  = pc->gids[1];
+	ce->suid  = pc->uids[2];
+	ce->sgid  = pc->gids[2];
+	ce->fsuid = pc->uids[3];
+	ce->fsgid = pc->gids[3];
+
+	ce->n_cap_inh = CR_CAP_SIZE;
+	ce->cap_inh = pc->cap_inh;
+	ce->n_cap_prm = CR_CAP_SIZE;
+	ce->cap_prm = pc->cap_prm;
+	ce->n_cap_eff = CR_CAP_SIZE;
+	ce->cap_eff = pc->cap_eff;
+	ce->n_cap_bnd = CR_CAP_SIZE;
+	ce->cap_bnd = pc->cap_bnd;
+
 	ce->secbits = pc->secbits;
 	ce->n_groups = pc->ngroups;
 
diff --git a/pie/parasite.c b/pie/parasite.c
index 92e7708..948718f 100644
--- a/pie/parasite.c
+++ b/pie/parasite.c
@@ -2,6 +2,7 @@
 #include <errno.h>
 #include <signal.h>
 #include <linux/limits.h>
+#include <linux/capability.h>
 #include <sys/mount.h>
 #include <stdarg.h>
 #include <sys/ioctl.h>
@@ -177,7 +178,59 @@ static int dump_misc(struct parasite_dump_misc *args)
 
 static int dump_creds(struct parasite_dump_creds *args)
 {
-	int ret;
+	int ret, i, j;
+	struct cap_data data[_LINUX_CAPABILITY_U32S_3];
+	struct cap_header hdr = {_LINUX_CAPABILITY_VERSION_3, 0};
+
+	ret = sys_getresuid(&args->uids[0], &args->uids[1], &args->uids[2]);
+	if (ret < 0) {
+		pr_err("Error calling getresuid (%d)\n", ret);
+		return -1;
+	}
+
+	ret = sys_getresgid(&args->gids[0], &args->gids[1], &args->gids[2]);
+	if (ret < 0) {
+		pr_err("Error calling getresgid (%d)\n", ret);
+		return -1;
+	}
+
+	/*
+	 * There is no getfsuid syscall, but setfsuid returns the existing
+	 * fsuid on failure, so just call it with an invalid uid.
+	 */
+	args->uids[3] = sys_setfsuid(-1);
+	args->gids[3] = sys_setfsgid(-1);
+
+	ret = sys_capget(&hdr, data);
+	if (ret < 0) {
+		pr_err("Unable to get capabilities: %d\n", ret);
+		return -1;
+	}
+
+	/*
+	 * Loop through the capability constants until we reach cap_last_cap.
+	 * The cap_bnd set is stored as a bitmask comprised of CR_CAP_SIZE number of
+	 * 32-bit uints, hence the inner loop from 0 to 32.
+	 */
+	for (i = 0; i < CR_CAP_SIZE; i++) {
+		args->cap_eff[i] = data[i].eff;
+		args->cap_prm[i] = data[i].prm;
+		args->cap_inh[i] = data[i].inh;
+		args->cap_bnd[i] = 0;
+
+		for (j = 0; j < 32; j++) {
+			if (j + i * 32 > args->cap_last_cap)
+				break;
+			ret = sys_prctl(PR_CAPBSET_READ, j + i * 32, 0, 0, 0);
+			if (ret < 0) {
+				pr_err("Unable to read capability %d: %d\n",
+					j + i * 32, ret);
+				return -1;
+			}
+			if (ret)
+				args->cap_bnd[i] |= (1 << j);
+		}
+	}
 
 	args->secbits = sys_prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
 
diff --git a/security.c b/security.c
index a801005..8a991d0 100644
--- a/security.c
+++ b/security.c
@@ -157,6 +157,12 @@ bool may_dump(struct proc_status_creds *creds)
 		check_caps(creds->cap_inh, creds->cap_eff, creds->cap_prm);
 }
 
+/*
+ * TODO: The check_ids calls may incorrectly fail if criu is started by a
+ * non-root user on a process in a user namespace, whose uid/gid matches the
+ * caller's outside the namespace but is mapped to a different uid/gid
+ * inside it.
+ */
 bool may_restore(CredsEntry *creds)
 {
 	return check_uids(creds->uid, creds->euid, creds->suid) &&
-- 
2.0.0.526.g5318336



More information about the CRIU mailing list