[CRIU] [PATCH 8/9] creds: restore -- Implement per-thread restore of credentials
Tycho Andersen
tycho.andersen at canonical.com
Thu Dec 17 07:56:35 PST 2015
On Thu, Dec 17, 2015 at 12:14:16PM +0300, Cyrill Gorcunov wrote:
> Because the creds parameters are to be passed inside pie/restorer
> code but read before thread_restore_args and task_restore_args
> structures are allocated we need a small trick and prepare
> creds int several stages
>
> - collect all creds data into separate private memory blobs
> - once all memory needed for restorer is allocated we relocate
> pointers in this blocks and setup
> thread_restore_args::thread_creds_args to appropriate
> address
> - restorer works as usual and setup creds parameters as before
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> cr-restore.c | 297 ++++++++++++++++++++++++++++++++---------------------
> include/restorer.h | 29 ++++--
> pie/restorer.c | 33 ++++--
> 3 files changed, 224 insertions(+), 135 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index aade3bc0c6a1..bd77eb3744cb 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -2460,73 +2460,6 @@ static inline int verify_cap_size(CredsEntry *ce)
> (ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE));
> }
>
> -static CredsEntry *read_creds(int pid)
> -{
> - int ret;
> - struct cr_img *img;
> - CredsEntry *ce = NULL;
> -
> - img = open_image(CR_FD_CREDS, O_RSTR, pid);
> - if (!img)
> - return NULL;
> -
> - ret = pb_read_one(img, &ce, PB_CREDS);
> - close_image(img);
> -
> - if (ret < 0) {
> - creds_entry__free_unpacked(ce, NULL);
> - return NULL;
> - }
> -
> - if (!verify_cap_size(ce)) {
> - pr_err("Caps size mismatch %d %d %d %d\n",
> - (int)ce->n_cap_inh, (int)ce->n_cap_eff,
> - (int)ce->n_cap_prm, (int)ce->n_cap_bnd);
> - creds_entry__free_unpacked(ce, NULL);
> - return NULL;
> - }
> -
> - if (!may_restore(ce)) {
> - creds_entry__free_unpacked(ce, NULL);
> - return NULL;
> - }
> -
> - return ce;
> -}
> -
> -static int prepare_creds(CredsEntry *ce, struct task_restore_args *args)
> -{
> - args->creds = *ce;
> - args->creds.cap_inh = args->cap_inh;
> - memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
> - args->creds.cap_eff = args->cap_eff;
> - memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
> - args->creds.cap_prm = args->cap_prm;
> - memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
> - args->creds.cap_bnd = args->cap_bnd;
> - memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
> -
> - /*
> - * We can set supplementary groups here. This won't affect any
> - * permission checks for us (we're still root) and will not be
> - * reset by subsequent creds changes in restorer.
> - */
> -
> - BUILD_BUG_ON(sizeof(*ce->groups) != sizeof(gid_t));
> - if (setgroups(ce->n_groups, ce->groups) < 0) {
> - pr_perror("Can't set supplementary groups");
> - return -1;
> - }
> -
> - creds_entry__free_unpacked(ce, NULL);
> -
> - args->cap_last_cap = kdat.last_cap;
> -
> - /* XXX -- validate creds here? */
> -
> - return 0;
> -}
> -
> static int prepare_mm(pid_t pid, struct task_restore_args *args)
> {
> int exe_fd, i, ret = -1;
> @@ -2823,6 +2756,175 @@ out:
> extern void __gcov_flush(void) __attribute__((weak));
> void __gcov_flush(void) {}
>
> +static void rst_reloc_creds(struct thread_restore_args *thread_args,
> + unsigned long *creds_pos_next)
> +{
> + struct thread_creds_args *args;
> +
> + if (unlikely(!*creds_pos_next))
> + return;
> +
> + args = rst_mem_remap_ptr(*creds_pos_next, RM_PRIVATE);
> +
> + if (args->lsm_profile)
> + args->lsm_profile = rst_mem_remap_ptr(args->mem_lsm_profile_pos, RM_PRIVATE);
> + if (args->groups)
> + args->groups = rst_mem_remap_ptr(args->mem_groups_pos, RM_PRIVATE);
> +
> + *creds_pos_next = args->mem_pos_next;
> + thread_args->creds_args = args;
> +}
> +
> +static struct thread_creds_args *
> +rst_prep_creds_args(struct thread_creds_args *prev, CredsEntry *ce)
> +{
> + unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
> + struct thread_creds_args *args;
> +
> + if (!verify_cap_size(ce)) {
> + pr_err("Caps size mismatch %d %d %d %d\n",
> + (int)ce->n_cap_inh, (int)ce->n_cap_eff,
> + (int)ce->n_cap_prm, (int)ce->n_cap_bnd);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + if (!may_restore(ce))
> + return ERR_PTR(-EINVAL);
> +
> + args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
> + if (!args)
> + return ERR_PTR(-ENOMEM);
> +
> + args->cap_last_cap = kdat.last_cap;
> + memcpy(&args->creds, ce, sizeof(args->creds));
> +
> + if (ce->lsm_profile || opts.lsm_supplied) {
> + char *rendered, *profile;
> +
> + profile = ce->lsm_profile;
> + if (opts.lsm_supplied)
> + profile = opts.lsm_profile;
> +
> + if (validate_lsm(profile) < 0)
> + return ERR_PTR(-EINVAL);
> +
> + if (profile) {
> + size_t lsm_profile_len;
> +
> + if (render_lsm_profile(profile, &rendered))
> + return ERR_PTR(-EINVAL);
> +
> + args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
> + lsm_profile_len = strlen(rendered);
> + args->lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
> + if (!args->lsm_profile) {
> + xfree(rendered);
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + strncpy(args->lsm_profile, rendered, lsm_profile_len);
> + xfree(rendered);
> + }
> + } else {
> + args->lsm_profile = NULL;
> + args->mem_lsm_profile_pos = 0;
> + }
> +
> + /*
> + * Zap fields which we cant use.
> + */
> + args->creds.cap_inh = NULL;
> + args->creds.cap_eff = NULL;
> + args->creds.cap_prm = NULL;
> + args->creds.cap_bnd = NULL;
> + args->creds.groups = NULL;
> + args->creds.lsm_profile = NULL;
> +
> + memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
> + memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
> + memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
> + memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
> +
> + if (ce->n_groups) {
> + args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
> + args->groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
> + if (!args->groups)
> + return ERR_PTR(-ENOMEM);
> + memcpy(args->groups, ce->groups, ce->n_groups * sizeof(u32));
> + } else {
> + args->groups = NULL;
> + args->mem_groups_pos = 0;
> + }
> +
> + args->mem_pos_next = 0;
> +
> + if (prev)
> + prev->mem_pos_next = this_pos;
> + return args;
> +}
> +
> +static int rst_prep_creds_from_img(pid_t pid)
> +{
> + CredsEntry *ce = NULL;
> + struct cr_img *img;
> + int ret;
> +
> + img = open_image(CR_FD_CREDS, O_RSTR, pid);
> + if (!img)
> + return -ENOENT;
> +
> + ret = pb_read_one(img, &ce, PB_CREDS);
> + close_image(img);
> +
> + if (ret > 0) {
> + struct thread_creds_args *args = NULL;
> +
> + args = rst_prep_creds_args(NULL, ce);
> + if (IS_ERR(args))
> + ret = PTR_ERR(args);
> + else
> + ret = 0;
> + }
> + creds_entry__free_unpacked(ce, NULL);
> + return ret;
> +}
> +
> +static int rst_prep_creds(pid_t pid, CoreEntry *core, unsigned long *creds_pos)
> +{
> + size_t i;
> +
> + /*
> + * This is _really_ very old image
> + * format where @thread_core were not
> + * present. It means we don't have
> + * creds either, just ignore and exit
> + * early.
> + */
> + if (unlikely(!core->thread_core)) {
> + *creds_pos = 0;
> + return 0;
> + }
> +
> + *creds_pos = rst_mem_cpos(RM_PRIVATE);
> +
> + /*
> + * Old format: one Creds per task carried in own image file.
> + */
> + if (!core->thread_core->creds)
> + return rst_prep_creds_from_img(pid);
I guess in this case we have to open the image once per thread, which
we try to avoid doing in other places. However, since this is the
"legacy" case, I think it's probably ok to just leave it like this.
On a similar note, the linked list with mem_pos_next seemed a little
weird, but I don't have a better suggestion :(
Tycho
More information about the CRIU
mailing list