[CRIU] [PATCH 9/9 v2] creds: restore -- Implement per-thread dump of credentials

Cyrill Gorcunov gorcunov at gmail.com
Mon Dec 21 03:08:41 PST 2015


This as well as restore requires several steps to reach per-thread
support during dump stage

 - @creds area to be fetched from the parasite is embedded into
   parasite_dump_structure

 - when test for task to be dumpable we no longer compare caps
   because we now allow them to be different (and I renamed
   proc_status_creds_eq to proc_status_creds_dumpable for this
   sake)

 - have to extend dump_thread_common to support dumping of
   creds (we call for dump_thread_common in several places,
   in particular when we need to fetch misc params we don't
   need creds, here @creds option comes into the play)

 - after this patch no creds-X.img file be generated anymore,
   I guess we might drop it off with time from descriptors

https://jira.sw.ru/browse/PSBM-41416

v2:
 - In dump_task_creds() don't mangle the call for parasite_dump_creds
   and collect_lsm_profile
 - PARASITE_MAX_GROUPS takes parasite_dump_thread into account because
   dump_thread_common now serves two cases: for plain misc parameters
   fetching and for creds as well (depending on the context)
 - when test for dumpable we still require the seccomp filters
   to match, they can be different and we need to support such
   configuration too but not in this series

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---

I still need update a test-case for this, I'm working on it
once read I send it out (meanwhile I've been testing the
series with container running openswan which uses threads
with different caps)

 cr-dump.c            | 37 +++++++++++-------------
 include/parasite.h   | 58 +++++++++++++++++++-------------------
 include/proc_parse.h |  3 +-
 parasite-syscall.c   | 79 +++++++++++++++++++++++++++++++---------------------
 pie/parasite.c       | 71 ++++++++++++++++++++++++----------------------
 proc_parse.c         | 50 +++++++++++++++++++++++++++++----
 ptrace.c             |  2 +-
 7 files changed, 178 insertions(+), 122 deletions(-)

diff --git a/cr-dump.c b/cr-dump.c
index e76071cb1a1c..28aca1883e11 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -510,22 +510,17 @@ err:
 	return ret;
 }
 
-static int dump_task_creds(struct parasite_ctl *ctl,
-			   const struct cr_imgset *fds)
+static int dump_task_creds(struct parasite_ctl *ctl, pid_t pid, CredsEntry *creds)
 {
-	CredsEntry ce = CREDS_ENTRY__INIT;
+	pr_info("Dumping creds for %d\n", pid);
 
-	pr_info("\n");
-	pr_info("Dumping creds for %d)\n", ctl->pid.real);
-	pr_info("----------------------------------------\n");
-
-	if (parasite_dump_creds(ctl, &ce) < 0)
+	if (parasite_dump_creds(ctl, creds) < 0)
 		return -1;
 
-	if (collect_lsm_profile(ctl->pid.real, &ce) < 0)
+	if (collect_lsm_profile(ctl->pid.real, creds) < 0)
 		return -1;
 
-	return pb_write_one(img_from_set(fds, CR_FD_CREDS), &ce, PB_CREDS);
+	return 0;
 }
 
 static int get_task_futex_robust_list(pid_t pid, ThreadCoreEntry *info)
@@ -690,13 +685,15 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
 	return ret;
 }
 
-static int dump_task_core_all(struct pstree_item *item,
-		const struct proc_pid_stat *stat,
-		const struct parasite_dump_misc *misc,
-		const struct cr_imgset *cr_imgset)
+static int dump_task_core_all(struct parasite_ctl *ctl,
+			      struct pstree_item *item,
+			      const struct proc_pid_stat *stat,
+			      const struct parasite_dump_misc *misc,
+			      const struct cr_imgset *cr_imgset)
 {
 	struct cr_img *img;
 	CoreEntry *core = item->core[0];
+	CredsEntry *ce = core->thread_core->creds;
 	pid_t pid = item->pid.real;
 	int ret = -1;
 	struct proc_status_creds *creds;
@@ -743,6 +740,10 @@ static int dump_task_core_all(struct pstree_item *item,
 	if (ret)
 		goto err;
 
+	ret = dump_task_creds(ctl, pid, ce);
+	if (ret)
+		goto err;
+
 	img = img_from_set(cr_imgset, CR_FD_CORE);
 	ret = pb_write_one(img, core, PB_CORE);
 	if (ret < 0)
@@ -1326,18 +1327,12 @@ static int dump_one_task(struct pstree_item *item)
 		goto err_cure;
 	}
 
-	ret = dump_task_core_all(item, &pps_buf, &misc, cr_imgset);
+	ret = dump_task_core_all(parasite_ctl, item, &pps_buf, &misc, cr_imgset);
 	if (ret) {
 		pr_err("Dump core (pid: %d) failed with %d\n", pid, ret);
 		goto err_cure;
 	}
 
-	ret = dump_task_creds(parasite_ctl, cr_imgset);
-	if (ret) {
-		pr_err("Dump creds (pid: %d) failed with %d\n", pid, ret);
-		goto err;
-	}
-
 	ret = parasite_stop_daemon(parasite_ctl);
 	if (ret) {
 		pr_err("Can't cure (pid: %d) from parasite\n", pid);
diff --git a/include/parasite.h b/include/parasite.h
index f884bb5baeb4..d3aedf204849 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -151,40 +151,14 @@ static inline int posix_timers_dump_size(int timer_n)
 	return sizeof(int) + sizeof(struct posix_timer) * timer_n;
 }
 
-struct parasite_dump_thread {
-	unsigned int		*tid_addr;
-	pid_t			tid;
-	tls_t			tls;
-	stack_t			sas;
-	int			pdeath_sig;
-};
-
-/*
- * Misc sfuff, that is too small for separate file, but cannot
- * be read w/o using parasite
- */
-
-struct parasite_dump_misc {
-	unsigned long		brk;
-
-	u32 pid;
-	u32 sid;
-	u32 pgid;
-	u32 umask;
-
-	struct parasite_dump_thread	ti;
-
-	int dumpable;
-};
-
 /*
  * Calculate how long we can make the groups array in parasite_dump_creds
  * and still fit the struct in one page
  */
-#define PARASITE_MAX_GROUPS							\
-	(PAGE_SIZE -								\
-	 offsetof(struct parasite_dump_creds, groups)				\
-	) / sizeof(unsigned int)		/* groups */
+#define PARASITE_MAX_GROUPS						\
+	((PAGE_SIZE - sizeof(struct parasite_dump_thread) -		\
+	  offsetof(struct parasite_dump_creds, groups)) /		\
+	 sizeof(unsigned int))
 
 struct parasite_dump_creds {
 	unsigned int		cap_last_cap;
@@ -214,6 +188,30 @@ struct parasite_dump_creds {
 	unsigned int		groups[0];
 };
 
+struct parasite_dump_thread {
+	unsigned int			*tid_addr;
+	pid_t				tid;
+	tls_t				tls;
+	stack_t				sas;
+	int				pdeath_sig;
+	struct parasite_dump_creds	creds[0];
+};
+
+/*
+ * Misc sfuff, that is too small for separate file, but cannot
+ * be read w/o using parasite
+ */
+
+struct parasite_dump_misc {
+	unsigned long			brk;
+	u32				pid;
+	u32				sid;
+	u32				pgid;
+	u32				umask;
+	int				dumpable;
+	struct parasite_dump_thread	ti;
+};
+
 static inline void copy_sas(ThreadSasEntry *dst, const stack_t *src)
 {
 	dst->ss_sp = encode_pointer(src->ss_sp);
diff --git a/include/proc_parse.h b/include/proc_parse.h
index f83bb7a3d0f7..2aff4f14db9f 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -103,7 +103,8 @@ struct proc_status_creds {
 	u32			cap_bnd[PROC_CAP_SIZE];
 };
 
-bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2);
+bool proc_status_creds_dumpable(struct proc_status_creds *parent,
+				struct proc_status_creds *child);
 
 typedef int (*mount_fn_t)(struct mount_info *mi, const char *src, const
 			  char *fstype, unsigned long mountflags);
diff --git a/parasite-syscall.c b/parasite-syscall.c
index bcd9922c4221..6f67cbb31ee8 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -532,12 +532,49 @@ err:
 	return -1;
 }
 
+static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c)
+{
+	BUILD_BUG_ON(sizeof(ce->groups[0]) != sizeof(c->groups[0]));
+	BUILD_BUG_ON(sizeof(ce->cap_inh[0]) != sizeof(c->cap_inh[0]));
+	BUILD_BUG_ON(sizeof(ce->cap_prm[0]) != sizeof(c->cap_prm[0]));
+	BUILD_BUG_ON(sizeof(ce->cap_eff[0]) != sizeof(c->cap_eff[0]));
+	BUILD_BUG_ON(sizeof(ce->cap_bnd[0]) != sizeof(c->cap_bnd[0]));
+
+	BUG_ON(ce->n_cap_inh != CR_CAP_SIZE);
+	BUG_ON(ce->n_cap_prm != CR_CAP_SIZE);
+	BUG_ON(ce->n_cap_eff != CR_CAP_SIZE);
+	BUG_ON(ce->n_cap_bnd != CR_CAP_SIZE);
+
+	memcpy(ce->cap_inh, c->cap_inh, sizeof(c->cap_inh[0]) * CR_CAP_SIZE);
+	memcpy(ce->cap_prm, c->cap_prm, sizeof(c->cap_prm[0]) * CR_CAP_SIZE);
+	memcpy(ce->cap_eff, c->cap_eff, sizeof(c->cap_eff[0]) * CR_CAP_SIZE);
+	memcpy(ce->cap_bnd, c->cap_bnd, sizeof(c->cap_bnd[0]) * CR_CAP_SIZE);
+
+	ce->secbits	= c->secbits;
+	ce->n_groups	= c->ngroups;
+
+	ce->groups	= xmemdup(c->groups, sizeof(c->groups[0]) * c->ngroups);
+
+	ce->uid		= c->uids[0];
+	ce->gid		= c->gids[0];
+	ce->euid	= c->uids[1];
+	ce->egid	= c->gids[1];
+	ce->suid	= c->uids[2];
+	ce->sgid	= c->gids[2];
+	ce->fsuid	= c->uids[3];
+	ce->fsgid	= c->gids[3];
+
+	return ce->groups ? 0 : -ENOMEM;
+}
+
 int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
 				struct pid *tid, CoreEntry *core)
 {
 	struct parasite_dump_thread *args;
 	pid_t pid = tid->real;
 	ThreadCoreEntry *tc = core->thread_core;
+	CredsEntry *creds = tc->creds;
+	struct parasite_dump_creds *pc;
 	int ret;
 	struct thread_ctx octx;
 
@@ -545,6 +582,9 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
 
 	args = parasite_args(ctl, struct parasite_dump_thread);
 
+	pc = args->creds;
+	pc->cap_last_cap = kdat.last_cap;
+
 	ret = get_thread_ctx(pid, &octx);
 	if (ret)
 		return -1;
@@ -552,6 +592,7 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
 	tc->has_blk_sigset = true;
 	memcpy(&tc->blk_sigset, &octx.sigmask, sizeof(k_rtsigset_t));
 
+
 	ret = parasite_execute_trap_by_pid(PARASITE_CMD_DUMP_THREAD, ctl,
 			pid, ctl->r_thread_stack, &octx);
 	if (ret) {
@@ -559,6 +600,12 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
 		return -1;
 	}
 
+	ret = alloc_groups_copy_creds(creds, pc);
+	if (ret) {
+		pr_err("Can't copy creds for thread %d\n", pid);
+		return -1;
+	}
+
 	ret = get_task_regs(pid, octx.regs, core);
 	if (ret) {
 		pr_err("Can't obtain regs for thread %d\n", pid);
@@ -742,37 +789,7 @@ int parasite_dump_creds(struct parasite_ctl *ctl, CredsEntry *ce)
 	if (parasite_execute_daemon(PARASITE_CMD_DUMP_CREDS, ctl) < 0)
 		return -1;
 
-	ce->n_cap_inh = CR_CAP_SIZE;
-	ce->cap_inh = pc->cap_inh;
-	ce->n_cap_prm = CR_CAP_SIZE;
-	ce->cap_prm = pc->cap_prm;
-	ce->n_cap_eff = CR_CAP_SIZE;
-	ce->cap_eff = pc->cap_eff;
-	ce->n_cap_bnd = CR_CAP_SIZE;
-	ce->cap_bnd = pc->cap_bnd;
-
-	ce->secbits = pc->secbits;
-	ce->n_groups = pc->ngroups;
-
-	/*
-	 * Achtung! We leak the parasite args pointer to the caller.
-	 * It's not safe in general, but in our case is OK, since the
-	 * latter doesn't go to parasite before using the data in it.
-	 */
-
-	BUILD_BUG_ON(sizeof(ce->groups[0]) != sizeof(pc->groups[0]));
-	ce->groups = pc->groups;
-
-	ce->uid   = pc->uids[0];
-	ce->gid   = pc->gids[0];
-	ce->euid  = pc->uids[1];
-	ce->egid  = pc->gids[1];
-	ce->suid  = pc->uids[2];
-	ce->sgid  = pc->gids[2];
-	ce->fsuid = pc->uids[3];
-	ce->fsgid = pc->gids[3];
-
-	return 0;
+	return alloc_groups_copy_creds(ce, pc);
 }
 
 int parasite_drain_fds_seized(struct parasite_ctl *ctl,
diff --git a/pie/parasite.c b/pie/parasite.c
index a39c035a7f71..2391737a752d 100644
--- a/pie/parasite.c
+++ b/pie/parasite.c
@@ -145,38 +145,6 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args)
 	return ret;
 }
 
-static int dump_thread_common(struct parasite_dump_thread *ti)
-{
-	int ret;
-
-	arch_get_tls(&ti->tls);
-	ret = sys_prctl(PR_GET_TID_ADDRESS, (unsigned long) &ti->tid_addr, 0, 0, 0);
-	if (ret)
-		goto out;
-
-	ret = sys_sigaltstack(NULL, &ti->sas);
-	if (ret)
-		goto out;
-
-	ret = sys_prctl(PR_GET_PDEATHSIG, (unsigned long)&ti->pdeath_sig, 0, 0, 0);
-out:
-	return ret;
-}
-
-static int dump_misc(struct parasite_dump_misc *args)
-{
-	args->brk = sys_brk(0);
-
-	args->pid = sys_getpid();
-	args->sid = sys_getsid();
-	args->pgid = sys_getpgid(0);
-	args->umask = sys_umask(0);
-	sys_umask(args->umask); /* never fails */
-	args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
-
-	return dump_thread_common(&args->ti);
-}
-
 static int dump_creds(struct parasite_dump_creds *args)
 {
 	int ret, i, j;
@@ -259,6 +227,43 @@ grps_err:
 	return -1;
 }
 
+static int dump_thread_common(struct parasite_dump_thread *ti, bool creds)
+{
+	int ret;
+
+	arch_get_tls(&ti->tls);
+	ret = sys_prctl(PR_GET_TID_ADDRESS, (unsigned long) &ti->tid_addr, 0, 0, 0);
+	if (ret)
+		goto out;
+
+	ret = sys_sigaltstack(NULL, &ti->sas);
+	if (ret)
+		goto out;
+
+	ret = sys_prctl(PR_GET_PDEATHSIG, (unsigned long)&ti->pdeath_sig, 0, 0, 0);
+	if (ret)
+		goto out;
+
+	if (creds)
+		ret = dump_creds(ti->creds);
+out:
+	return ret;
+}
+
+static int dump_misc(struct parasite_dump_misc *args)
+{
+	args->brk = sys_brk(0);
+
+	args->pid = sys_getpid();
+	args->sid = sys_getsid();
+	args->pgid = sys_getpgid(0);
+	args->umask = sys_umask(0);
+	sys_umask(args->umask); /* never fails */
+	args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
+
+	return dump_thread_common(&args->ti, false);
+}
+
 static int drain_fds(struct parasite_drain_fd *args)
 {
 	int ret;
@@ -274,7 +279,7 @@ static int drain_fds(struct parasite_drain_fd *args)
 static int dump_thread(struct parasite_dump_thread *args)
 {
 	args->tid = sys_gettid();
-	return dump_thread_common(args);
+	return dump_thread_common(args, true);
 }
 
 static char proc_mountpoint[] = "proc.crtools";
diff --git a/proc_parse.c b/proc_parse.c
index 2eef21a8d169..648f1aaecc6a 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -2221,13 +2221,53 @@ int aufs_parse(struct mount_info *new)
 	return ret;
 }
 
-bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2)
+bool proc_status_creds_dumpable(struct proc_status_creds *parent,
+				struct proc_status_creds *child)
 {
-	/* FIXME: this is a little too strict, we should do semantic comparison
-	 * of seccomp filters instead of forcing them to be exactly identical.
-	 * It's not unsafe, though, so let's be lazy for now.
+	const size_t size = sizeof(struct proc_status_creds) -
+			offsetof(struct proc_status_creds, cap_inh);
+
+	/*
+	 * The comparision rules are the following
+	 *
+	 *  - CAPs can be different
+	 *  - seccomp filters should be passed via
+	 *    semantic comparision (FIXME) but for
+	 *    now we require them to be exactly
+	 *    identical
+	 *  - the rest of members must match
 	 */
-	return memcmp(o1, o2, sizeof(struct proc_status_creds)) == 0;
+
+	if (memcmp(parent, child, size)) {
+		if (!pr_quelled(LOG_DEBUG)) {
+			pr_debug("Creds undumpable (parent:child)\n"
+				 "  uids:               %d:%d %d:%d %d:%d %d:%d\n"
+				 "  gids:               %d:%d %d:%d %d:%d %d:%d\n"
+				 "  state:              %d:%d"
+				 "  ppid:               %d:%d\n"
+				 "  sigpnd:             %llu:%llu\n"
+				 "  shdpnd:             %llu:%llu\n"
+				 "  seccomp_mode:       %d:%d\n"
+				 "  last_filter:        %u:%u\n",
+				 parent->uids[0], child->uids[0],
+				 parent->uids[1], child->uids[1],
+				 parent->uids[2], child->uids[2],
+				 parent->uids[3], child->uids[3],
+				 parent->gids[0], child->gids[0],
+				 parent->gids[1], child->gids[1],
+				 parent->gids[2], child->gids[2],
+				 parent->gids[3], child->gids[3],
+				 parent->state, child->state,
+				 parent->ppid, child->ppid,
+				 parent->sigpnd, child->sigpnd,
+				 parent->shdpnd, child->shdpnd,
+				 parent->seccomp_mode, child->seccomp_mode,
+				 parent->last_filter, child->last_filter);
+		}
+		return false;
+	}
+
+	return true;
 }
 
 int parse_children(pid_t pid, pid_t **_c, int *_n)
diff --git a/ptrace.c b/ptrace.c
index 3f24afb22736..510377e77e0a 100644
--- a/ptrace.c
+++ b/ptrace.c
@@ -247,7 +247,7 @@ try_again:
 
 		**creds = cr;
 
-	} else if (!proc_status_creds_eq(*creds, &cr)) {
+	} else if (!proc_status_creds_dumpable(*creds, &cr)) {
 		pr_err("creds don't match %d %d\n", pid, ppid);
 		goto err;
 	}
-- 
2.5.0



More information about the CRIU mailing list