[CRIU] [PATCH 8/9] creds: restore -- Implement per-thread restore of credentials

Fri Dec 18 06:14:34 PST 2015

> @@ -2823,6 +2756,175 @@ out:
>  extern void __gcov_flush(void) __attribute__((weak));
>  void __gcov_flush(void) {}
>  
> +static void rst_reloc_creds(struct thread_restore_args *thread_args,
> +			    unsigned long *creds_pos_next)
> +{
> +	struct thread_creds_args *args;
> +
> +	if (unlikely(!*creds_pos_next))
> +		return;
> +
> +	args = rst_mem_remap_ptr(*creds_pos_next, RM_PRIVATE);
> +
> +	if (args->lsm_profile)
> +		args->lsm_profile = rst_mem_remap_ptr(args->mem_lsm_profile_pos, RM_PRIVATE);
> +	if (args->groups)
> +		args->groups = rst_mem_remap_ptr(args->mem_groups_pos, RM_PRIVATE);
> +
> +	*creds_pos_next = args->mem_pos_next;
> +	thread_args->creds_args = args;
> +}
> +
> +static struct thread_creds_args *
> +rst_prep_creds_args(struct thread_creds_args *prev, CredsEntry *ce)
> +{
> +	unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
> +	struct thread_creds_args *args;
> +
> +	if (!verify_cap_size(ce)) {
> +		pr_err("Caps size mismatch %d %d %d %d\n",
> +		       (int)ce->n_cap_inh, (int)ce->n_cap_eff,
> +		       (int)ce->n_cap_prm, (int)ce->n_cap_bnd);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (!may_restore(ce))
> +		return ERR_PTR(-EINVAL);
> +
> +	args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
> +	if (!args)
> +		return ERR_PTR(-ENOMEM);
> +
> +	args->cap_last_cap = kdat.last_cap;

This is per-thread constant, leave it on task_args.

> +	memcpy(&args->creds, ce, sizeof(args->creds));
> +
> +	if (ce->lsm_profile || opts.lsm_supplied) {
> +		char *rendered, *profile;
> +
> +		profile = ce->lsm_profile;
> +		if (opts.lsm_supplied)
> +			profile = opts.lsm_profile;
> +
> +		if (validate_lsm(profile) < 0)
> +			return ERR_PTR(-EINVAL);
> +
> +		if (profile) {
> +			size_t lsm_profile_len;
> +
> +			if (render_lsm_profile(profile, &rendered))
> +				return ERR_PTR(-EINVAL);
> +
> +			args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
> +			lsm_profile_len = strlen(rendered);
> +			args->lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
> +			if (!args->lsm_profile) {
> +				xfree(rendered);
> +				return ERR_PTR(-ENOMEM);
> +			}
> +
> +			strncpy(args->lsm_profile, rendered, lsm_profile_len);
> +			xfree(rendered);
> +		}
> +	} else {
> +		args->lsm_profile = NULL;
> +		args->mem_lsm_profile_pos = 0;
> +	}
> +
> +	/*
> +	 * Zap fields which we cant use.
> +	 */
> +	args->creds.cap_inh = NULL;
> +	args->creds.cap_eff = NULL;
> +	args->creds.cap_prm = NULL;
> +	args->creds.cap_bnd = NULL;
> +	args->creds.groups = NULL;
> +	args->creds.lsm_profile = NULL;
> +
> +	memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
> +	memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
> +	memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
> +	memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
> +
> +	if (ce->n_groups) {
> +		args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
> +		args->groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
> +		if (!args->groups)
> +			return ERR_PTR(-ENOMEM);
> +		memcpy(args->groups, ce->groups, ce->n_groups * sizeof(u32));
> +	} else {
> +		args->groups = NULL;
> +		args->mem_groups_pos = 0;
> +	}
> +
> +	args->mem_pos_next = 0;
> +
> +	if (prev)
> +		prev->mem_pos_next = this_pos;
> +	return args;
> +}
> +

> +static int rst_prep_creds(pid_t pid, CoreEntry *core, unsigned long *creds_pos)
> +{
> +	size_t i;
> +
> +	/*
> +	 * This is _really_ very old image
> +	 * format where @thread_core were not
> +	 * present. It means we don't have
> +	 * creds either, just ignore and exit
> +	 * early.
> +	 */
> +	if (unlikely(!core->thread_core)) {
> +		*creds_pos = 0;
> +		return 0;
> +	}
> +
> +	*creds_pos = rst_mem_cpos(RM_PRIVATE);
> +
> +	/*
> +	 * Old format: one Creds per task carried in own image file.
> +	 */
> +	if (!core->thread_core->creds)
> +		return rst_prep_creds_from_img(pid);

This would produce only one creds object at creds_cpos, while the loop
below would produce several of them. But the threads_args code ... more
below scans (should scan) creds_pos as array which would only work for
"new" case.

> +
> +	for (i = 0; i < current->nr_threads; i++) {
> +		CredsEntry *ce = current->core[i]->thread_core->creds;
> +		struct thread_creds_args *args = NULL;
> +
> +		args = rst_prep_creds_args(args, ce);

The args as a function argument is always NULL here, so it is in the legacy
function above. Is this intended behavior?

> +		if (IS_ERR(args))
> +			return PTR_ERR(args);
> +	}
> +
> +	return 0;
> +}
> +
>  static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  {
>  	void *mem = MAP_FAILED;

> @@ -3124,6 +3184,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	 * Fill up per-thread data.
>  	 */
>  	for (i = 0; i < current->nr_threads; i++) {
> +		unsigned long creds_pos_next = creds_pos;
>  		CoreEntry *tcore;
>  		struct rt_sigframe *sigframe;
>  
> @@ -3157,6 +3218,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  		thread_args[i].clear_tid_addr	= CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
>  		core_get_tls(tcore, &thread_args[i].tls);
>  
> +		rst_reloc_creds(&thread_args[i], &creds_pos_next);

The creds_pos_next is write-only constant variable here. Why do threads get
different creds after all?

> +
>  		if (tcore->thread_core) {
>  			thread_args[i].has_futex	= true;
>  			thread_args[i].futex_rla	= tcore->thread_core->futex_rla;

> @@ -884,7 +895,7 @@ long __export_restore_task(struct task_restore_args *args)
>  	log_set_fd(args->logfd);
>  	log_set_loglevel(args->loglevel);
>  
> -	cap_last_cap = args->cap_last_cap;
> +	cap_last_cap = args->t->creds_args->cap_last_cap;

The global cap_last_cap becomes unused after this patch.

>  
>  	pr_info("Switched to the restorer %d\n", my_pid);
>