[CRIU] [PATCH 9/9] creds: restore -- Implement per-thread dump of credentials

Pavel Emelyanov xemul at parallels.com
Fri Dec 18 06:21:14 PST 2015


On 12/17/2015 12:14 PM, Cyrill Gorcunov wrote:
> This as well as restore requires several steps to reach per-thread
> support during dump stage
> 
>  - @creds area to be fetched from the parasite is embedded into
>    parasite_dump_structure
> 
>  - when test for task to be dumpable we no longer compare caps
>    because we now allow them to be different (and I renamed
>    proc_status_creds_eq to proc_status_creds_dumpable for this
>    sake)
> 
>  - have to extend dump_thread_common to support dumping of
>    creds (we call for dump_thread_common in several places,
>    in particular when we need to fetch misc params we don't
>    need creds, here @creds option comes into the play)
> 
>  - after this patch no creds-X.img file be generated anymore,
>    I guess we might drop it off with time from descriptors
> 
> https://jira.sw.ru/browse/PSBM-41416
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  cr-dump.c            | 41 ++++++++++++---------------
>  include/parasite.h   | 58 +++++++++++++++++++-------------------
>  include/proc_parse.h |  3 +-
>  parasite-syscall.c   | 79 +++++++++++++++++++++++++++++++---------------------
>  pie/parasite.c       | 71 ++++++++++++++++++++++++----------------------
>  proc_parse.c         | 50 +++++++++++++++++++++++++++++----
>  ptrace.c             |  2 +-
>  7 files changed, 180 insertions(+), 124 deletions(-)
> 
> diff --git a/cr-dump.c b/cr-dump.c
> index e76071cb1a1c..d321d8f2f0fe 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -510,22 +510,17 @@ err:
>  	return ret;
>  }
>  
> -static int dump_task_creds(struct parasite_ctl *ctl,
> -			   const struct cr_imgset *fds)
> +static int dump_task_creds(struct parasite_ctl *ctl, pid_t pid, CredsEntry *creds)
>  {
> -	CredsEntry ce = CREDS_ENTRY__INIT;
> +	pr_info("Dumping creds for %d\n", pid);
>  
> -	pr_info("\n");
> -	pr_info("Dumping creds for %d)\n", ctl->pid.real);
> -	pr_info("----------------------------------------\n");
> -
> -	if (parasite_dump_creds(ctl, &ce) < 0)
> -		return -1;
> -
> -	if (collect_lsm_profile(ctl->pid.real, &ce) < 0)
> +	if (parasite_dump_creds(ctl, creds) < 0 ||
> +	    collect_lsm_profile(ctl->pid.real, creds) < 0) {
> +		pr_err("Dump creds (pid: %d) failed\n",pid);
>  		return -1;
> +	}

What's the reason for screwing up this code? Why two if-s were bad? Please,
if you don't like the codeflow, fix it in separate patch, don't mix functional
changes with cosmetic ones.

>  
> -	return pb_write_one(img_from_set(fds, CR_FD_CREDS), &ce, PB_CREDS);
> +	return 0;
>  }
>  
>  static int get_task_futex_robust_list(pid_t pid, ThreadCoreEntry *info)
> @@ -690,13 +685,15 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread
>  	return ret;
>  }
>  
> -static int dump_task_core_all(struct pstree_item *item,
> -		const struct proc_pid_stat *stat,
> -		const struct parasite_dump_misc *misc,
> -		const struct cr_imgset *cr_imgset)
> +static int dump_task_core_all(struct parasite_ctl *ctl,
> +			      struct pstree_item *item,
> +			      const struct proc_pid_stat *stat,
> +			      const struct parasite_dump_misc *misc,
> +			      const struct cr_imgset *cr_imgset)
>  {
>  	struct cr_img *img;
>  	CoreEntry *core = item->core[0];
> +	CredsEntry *ce = core->thread_core->creds;
>  	pid_t pid = item->pid.real;
>  	int ret = -1;
>  	struct proc_status_creds *creds;
> @@ -743,6 +740,10 @@ static int dump_task_core_all(struct pstree_item *item,
>  	if (ret)
>  		goto err;
>  
> +	ret = dump_task_creds(ctl, pid, ce);
> +	if (ret)
> +		goto err;
> +
>  	img = img_from_set(cr_imgset, CR_FD_CORE);
>  	ret = pb_write_one(img, core, PB_CORE);
>  	if (ret < 0)
> @@ -1326,18 +1327,12 @@ static int dump_one_task(struct pstree_item *item)
>  		goto err_cure;
>  	}
>  
> -	ret = dump_task_core_all(item, &pps_buf, &misc, cr_imgset);
> +	ret = dump_task_core_all(parasite_ctl, item, &pps_buf, &misc, cr_imgset);
>  	if (ret) {
>  		pr_err("Dump core (pid: %d) failed with %d\n", pid, ret);
>  		goto err_cure;
>  	}
>  
> -	ret = dump_task_creds(parasite_ctl, cr_imgset);
> -	if (ret) {
> -		pr_err("Dump creds (pid: %d) failed with %d\n", pid, ret);
> -		goto err;
> -	}
> -
>  	ret = parasite_stop_daemon(parasite_ctl);
>  	if (ret) {
>  		pr_err("Can't cure (pid: %d) from parasite\n", pid);
> diff --git a/include/parasite.h b/include/parasite.h
> index f884bb5baeb4..d3aedf204849 100644
> --- a/include/parasite.h
> +++ b/include/parasite.h
> @@ -151,40 +151,14 @@ static inline int posix_timers_dump_size(int timer_n)
>  	return sizeof(int) + sizeof(struct posix_timer) * timer_n;
>  }
>  
> -struct parasite_dump_thread {
> -	unsigned int		*tid_addr;
> -	pid_t			tid;
> -	tls_t			tls;
> -	stack_t			sas;
> -	int			pdeath_sig;
> -};
> -
> -/*
> - * Misc sfuff, that is too small for separate file, but cannot
> - * be read w/o using parasite
> - */
> -
> -struct parasite_dump_misc {
> -	unsigned long		brk;
> -
> -	u32 pid;
> -	u32 sid;
> -	u32 pgid;
> -	u32 umask;
> -
> -	struct parasite_dump_thread	ti;
> -
> -	int dumpable;
> -};
> -
>  /*
>   * Calculate how long we can make the groups array in parasite_dump_creds
>   * and still fit the struct in one page
>   */
> -#define PARASITE_MAX_GROUPS							\
> -	(PAGE_SIZE -								\
> -	 offsetof(struct parasite_dump_creds, groups)				\
> -	) / sizeof(unsigned int)		/* groups */
> +#define PARASITE_MAX_GROUPS						\
> +	((PAGE_SIZE - sizeof(struct parasite_dump_thread) -		\
> +	  offsetof(struct parasite_dump_creds, groups)) /		\
> +	 sizeof(unsigned int))
>  
>  struct parasite_dump_creds {
>  	unsigned int		cap_last_cap;
> @@ -214,6 +188,30 @@ struct parasite_dump_creds {
>  	unsigned int		groups[0];
>  };
>  
> +struct parasite_dump_thread {
> +	unsigned int			*tid_addr;
> +	pid_t				tid;
> +	tls_t				tls;
> +	stack_t				sas;
> +	int				pdeath_sig;
> +	struct parasite_dump_creds	creds[0];
> +};
> +
> +/*
> + * Misc sfuff, that is too small for separate file, but cannot
> + * be read w/o using parasite
> + */
> +
> +struct parasite_dump_misc {
> +	unsigned long			brk;
> +	u32				pid;
> +	u32				sid;
> +	u32				pgid;
> +	u32				umask;
> +	int				dumpable;
> +	struct parasite_dump_thread	ti;
> +};

What's the reason for code move?

> +
>  static inline void copy_sas(ThreadSasEntry *dst, const stack_t *src)
>  {
>  	dst->ss_sp = encode_pointer(src->ss_sp);
> diff --git a/include/proc_parse.h b/include/proc_parse.h
> index f83bb7a3d0f7..2aff4f14db9f 100644
> --- a/include/proc_parse.h
> +++ b/include/proc_parse.h
> @@ -103,7 +103,8 @@ struct proc_status_creds {
>  	u32			cap_bnd[PROC_CAP_SIZE];
>  };
>  
> -bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2);
> +bool proc_status_creds_dumpable(struct proc_status_creds *parent,
> +				struct proc_status_creds *child);
>  
>  typedef int (*mount_fn_t)(struct mount_info *mi, const char *src, const
>  			  char *fstype, unsigned long mountflags);
> diff --git a/parasite-syscall.c b/parasite-syscall.c
> index bcd9922c4221..6f67cbb31ee8 100644
> --- a/parasite-syscall.c
> +++ b/parasite-syscall.c
> @@ -532,12 +532,49 @@ err:
>  	return -1;
>  }
>  
> +static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c)
> +{
> +	BUILD_BUG_ON(sizeof(ce->groups[0]) != sizeof(c->groups[0]));
> +	BUILD_BUG_ON(sizeof(ce->cap_inh[0]) != sizeof(c->cap_inh[0]));
> +	BUILD_BUG_ON(sizeof(ce->cap_prm[0]) != sizeof(c->cap_prm[0]));
> +	BUILD_BUG_ON(sizeof(ce->cap_eff[0]) != sizeof(c->cap_eff[0]));
> +	BUILD_BUG_ON(sizeof(ce->cap_bnd[0]) != sizeof(c->cap_bnd[0]));
> +
> +	BUG_ON(ce->n_cap_inh != CR_CAP_SIZE);
> +	BUG_ON(ce->n_cap_prm != CR_CAP_SIZE);
> +	BUG_ON(ce->n_cap_eff != CR_CAP_SIZE);
> +	BUG_ON(ce->n_cap_bnd != CR_CAP_SIZE);
> +
> +	memcpy(ce->cap_inh, c->cap_inh, sizeof(c->cap_inh[0]) * CR_CAP_SIZE);
> +	memcpy(ce->cap_prm, c->cap_prm, sizeof(c->cap_prm[0]) * CR_CAP_SIZE);
> +	memcpy(ce->cap_eff, c->cap_eff, sizeof(c->cap_eff[0]) * CR_CAP_SIZE);
> +	memcpy(ce->cap_bnd, c->cap_bnd, sizeof(c->cap_bnd[0]) * CR_CAP_SIZE);
> +
> +	ce->secbits	= c->secbits;
> +	ce->n_groups	= c->ngroups;
> +
> +	ce->groups	= xmemdup(c->groups, sizeof(c->groups[0]) * c->ngroups);
> +
> +	ce->uid		= c->uids[0];
> +	ce->gid		= c->gids[0];
> +	ce->euid	= c->uids[1];
> +	ce->egid	= c->gids[1];
> +	ce->suid	= c->uids[2];
> +	ce->sgid	= c->gids[2];
> +	ce->fsuid	= c->uids[3];
> +	ce->fsgid	= c->gids[3];
> +
> +	return ce->groups ? 0 : -ENOMEM;
> +}
> +
>  int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
>  				struct pid *tid, CoreEntry *core)
>  {
>  	struct parasite_dump_thread *args;
>  	pid_t pid = tid->real;
>  	ThreadCoreEntry *tc = core->thread_core;
> +	CredsEntry *creds = tc->creds;
> +	struct parasite_dump_creds *pc;
>  	int ret;
>  	struct thread_ctx octx;
>  
> @@ -545,6 +582,9 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
>  
>  	args = parasite_args(ctl, struct parasite_dump_thread);
>  
> +	pc = args->creds;
> +	pc->cap_last_cap = kdat.last_cap;

This is per-thread constant, leave it in per-task.

> +
>  	ret = get_thread_ctx(pid, &octx);
>  	if (ret)
>  		return -1;
> @@ -552,6 +592,7 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
>  	tc->has_blk_sigset = true;
>  	memcpy(&tc->blk_sigset, &octx.sigmask, sizeof(k_rtsigset_t));
>  
> +

Trash.

>  	ret = parasite_execute_trap_by_pid(PARASITE_CMD_DUMP_THREAD, ctl,
>  			pid, ctl->r_thread_stack, &octx);
>  	if (ret) {
> @@ -559,6 +600,12 @@ int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
>  		return -1;
>  	}
>  
> +	ret = alloc_groups_copy_creds(creds, pc);
> +	if (ret) {
> +		pr_err("Can't copy creds for thread %d\n", pid);
> +		return -1;
> +	}
> +
>  	ret = get_task_regs(pid, octx.regs, core);
>  	if (ret) {
>  		pr_err("Can't obtain regs for thread %d\n", pid);
> @@ -742,37 +789,7 @@ int parasite_dump_creds(struct parasite_ctl *ctl, CredsEntry *ce)
>  	if (parasite_execute_daemon(PARASITE_CMD_DUMP_CREDS, ctl) < 0)
>  		return -1;
>  
> -	ce->n_cap_inh = CR_CAP_SIZE;
> -	ce->cap_inh = pc->cap_inh;
> -	ce->n_cap_prm = CR_CAP_SIZE;
> -	ce->cap_prm = pc->cap_prm;
> -	ce->n_cap_eff = CR_CAP_SIZE;
> -	ce->cap_eff = pc->cap_eff;
> -	ce->n_cap_bnd = CR_CAP_SIZE;
> -	ce->cap_bnd = pc->cap_bnd;
> -
> -	ce->secbits = pc->secbits;
> -	ce->n_groups = pc->ngroups;
> -
> -	/*
> -	 * Achtung! We leak the parasite args pointer to the caller.
> -	 * It's not safe in general, but in our case is OK, since the
> -	 * latter doesn't go to parasite before using the data in it.
> -	 */
> -
> -	BUILD_BUG_ON(sizeof(ce->groups[0]) != sizeof(pc->groups[0]));
> -	ce->groups = pc->groups;
> -
> -	ce->uid   = pc->uids[0];
> -	ce->gid   = pc->gids[0];
> -	ce->euid  = pc->uids[1];
> -	ce->egid  = pc->gids[1];
> -	ce->suid  = pc->uids[2];
> -	ce->sgid  = pc->gids[2];
> -	ce->fsuid = pc->uids[3];
> -	ce->fsgid = pc->gids[3];
> -
> -	return 0;
> +	return alloc_groups_copy_creds(ce, pc);
>  }
>  
>  int parasite_drain_fds_seized(struct parasite_ctl *ctl,
> diff --git a/pie/parasite.c b/pie/parasite.c
> index a39c035a7f71..2391737a752d 100644
> --- a/pie/parasite.c
> +++ b/pie/parasite.c
> @@ -145,38 +145,6 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args)
>  	return ret;
>  }
>  
> -static int dump_thread_common(struct parasite_dump_thread *ti)
> -{
> -	int ret;
> -
> -	arch_get_tls(&ti->tls);
> -	ret = sys_prctl(PR_GET_TID_ADDRESS, (unsigned long) &ti->tid_addr, 0, 0, 0);
> -	if (ret)
> -		goto out;
> -
> -	ret = sys_sigaltstack(NULL, &ti->sas);
> -	if (ret)
> -		goto out;
> -
> -	ret = sys_prctl(PR_GET_PDEATHSIG, (unsigned long)&ti->pdeath_sig, 0, 0, 0);
> -out:
> -	return ret;
> -}
> -
> -static int dump_misc(struct parasite_dump_misc *args)
> -{
> -	args->brk = sys_brk(0);
> -
> -	args->pid = sys_getpid();
> -	args->sid = sys_getsid();
> -	args->pgid = sys_getpgid(0);
> -	args->umask = sys_umask(0);
> -	sys_umask(args->umask); /* never fails */
> -	args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
> -
> -	return dump_thread_common(&args->ti);
> -}
> -
>  static int dump_creds(struct parasite_dump_creds *args)
>  {
>  	int ret, i, j;
> @@ -259,6 +227,43 @@ grps_err:
>  	return -1;
>  }
>  
> +static int dump_thread_common(struct parasite_dump_thread *ti, bool creds)
> +{
> +	int ret;
> +
> +	arch_get_tls(&ti->tls);
> +	ret = sys_prctl(PR_GET_TID_ADDRESS, (unsigned long) &ti->tid_addr, 0, 0, 0);
> +	if (ret)
> +		goto out;
> +
> +	ret = sys_sigaltstack(NULL, &ti->sas);
> +	if (ret)
> +		goto out;
> +
> +	ret = sys_prctl(PR_GET_PDEATHSIG, (unsigned long)&ti->pdeath_sig, 0, 0, 0);
> +	if (ret)
> +		goto out;
> +
> +	if (creds)
> +		ret = dump_creds(ti->creds);
> +out:
> +	return ret;
> +}
> +
> +static int dump_misc(struct parasite_dump_misc *args)
> +{
> +	args->brk = sys_brk(0);
> +
> +	args->pid = sys_getpid();
> +	args->sid = sys_getsid();
> +	args->pgid = sys_getpgid(0);
> +	args->umask = sys_umask(0);
> +	sys_umask(args->umask); /* never fails */
> +	args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
> +
> +	return dump_thread_common(&args->ti, false);
> +}

What's the reason for code move?

> +
>  static int drain_fds(struct parasite_drain_fd *args)
>  {
>  	int ret;
> @@ -274,7 +279,7 @@ static int drain_fds(struct parasite_drain_fd *args)
>  static int dump_thread(struct parasite_dump_thread *args)
>  {
>  	args->tid = sys_gettid();
> -	return dump_thread_common(args);
> +	return dump_thread_common(args, true);

Why true here and false above?

>  }
>  
>  static char proc_mountpoint[] = "proc.crtools";
> diff --git a/proc_parse.c b/proc_parse.c
> index 2eef21a8d169..648f1aaecc6a 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -2221,13 +2221,53 @@ int aufs_parse(struct mount_info *new)
>  	return ret;
>  }
>  
> -bool proc_status_creds_eq(struct proc_status_creds *o1, struct proc_status_creds *o2)
> +bool proc_status_creds_dumpable(struct proc_status_creds *parent,
> +				struct proc_status_creds *child)
>  {
> -	/* FIXME: this is a little too strict, we should do semantic comparison
> -	 * of seccomp filters instead of forcing them to be exactly identical.
> -	 * It's not unsafe, though, so let's be lazy for now.
> +	const size_t size = sizeof(struct proc_status_creds) -
> +			offsetof(struct proc_status_creds, cap_inh);
> +
> +	/*
> +	 * The comparision rules are the following
> +	 *
> +	 *  - CAPs can be different
> +	 *  - seccomp filters should be passed via
> +	 *    semantic comparision (FIXME) but for
> +	 *    now we require them to be exactly
> +	 *    identical

I don't get this part of comment. You _just_ force filters to match (btw,
can different threads have different filters?).

> +	 *  - the rest of members must match
>  	 */
> -	return memcmp(o1, o2, sizeof(struct proc_status_creds)) == 0;
> +
> +	if (memcmp(parent, child, size)) {
> +		if (!pr_quelled(LOG_DEBUG)) {
> +			pr_debug("Creds undumpable (parent:child)\n"
> +				 "  uids:               %d:%d %d:%d %d:%d %d:%d\n"
> +				 "  gids:               %d:%d %d:%d %d:%d %d:%d\n"
> +				 "  state:              %d:%d"
> +				 "  ppid:               %d:%d\n"
> +				 "  sigpnd:             %llu:%llu\n"
> +				 "  shdpnd:             %llu:%llu\n"
> +				 "  seccomp_mode:       %d:%d\n"
> +				 "  last_filter:        %u:%u\n",
> +				 parent->uids[0], child->uids[0],
> +				 parent->uids[1], child->uids[1],
> +				 parent->uids[2], child->uids[2],
> +				 parent->uids[3], child->uids[3],
> +				 parent->gids[0], child->gids[0],
> +				 parent->gids[1], child->gids[1],
> +				 parent->gids[2], child->gids[2],
> +				 parent->gids[3], child->gids[3],
> +				 parent->state, child->state,
> +				 parent->ppid, child->ppid,
> +				 parent->sigpnd, child->sigpnd,
> +				 parent->shdpnd, child->shdpnd,
> +				 parent->seccomp_mode, child->seccomp_mode,
> +				 parent->last_filter, child->last_filter);
> +		}
> +		return false;
> +	}
> +
> +	return true;
>  }
>  
>  int parse_children(pid_t pid, pid_t **_c, int *_n)
> diff --git a/ptrace.c b/ptrace.c
> index 3f24afb22736..510377e77e0a 100644
> --- a/ptrace.c
> +++ b/ptrace.c
> @@ -247,7 +247,7 @@ try_again:
>  
>  		**creds = cr;
>  
> -	} else if (!proc_status_creds_eq(*creds, &cr)) {
> +	} else if (!proc_status_creds_dumpable(*creds, &cr)) {
>  		pr_err("creds don't match %d %d\n", pid, ppid);
>  		goto err;
>  	}
> 



More information about the CRIU mailing list