[CRIU] [PATCH 8/9] creds: restore -- Implement per-thread restore of credentials
Cyrill Gorcunov
gorcunov at openvz.org
Thu Dec 17 01:14:16 PST 2015
Because the creds parameters are to be passed inside pie/restorer
code but read before thread_restore_args and task_restore_args
structures are allocated we need a small trick and prepare
creds int several stages
- collect all creds data into separate private memory blobs
- once all memory needed for restorer is allocated we relocate
pointers in this blocks and setup
thread_restore_args::thread_creds_args to appropriate
address
- restorer works as usual and setup creds parameters as before
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
cr-restore.c | 297 ++++++++++++++++++++++++++++++++---------------------
include/restorer.h | 29 ++++--
pie/restorer.c | 33 ++++--
3 files changed, 224 insertions(+), 135 deletions(-)
diff --git a/cr-restore.c b/cr-restore.c
index aade3bc0c6a1..bd77eb3744cb 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -2460,73 +2460,6 @@ static inline int verify_cap_size(CredsEntry *ce)
(ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE));
}
-static CredsEntry *read_creds(int pid)
-{
- int ret;
- struct cr_img *img;
- CredsEntry *ce = NULL;
-
- img = open_image(CR_FD_CREDS, O_RSTR, pid);
- if (!img)
- return NULL;
-
- ret = pb_read_one(img, &ce, PB_CREDS);
- close_image(img);
-
- if (ret < 0) {
- creds_entry__free_unpacked(ce, NULL);
- return NULL;
- }
-
- if (!verify_cap_size(ce)) {
- pr_err("Caps size mismatch %d %d %d %d\n",
- (int)ce->n_cap_inh, (int)ce->n_cap_eff,
- (int)ce->n_cap_prm, (int)ce->n_cap_bnd);
- creds_entry__free_unpacked(ce, NULL);
- return NULL;
- }
-
- if (!may_restore(ce)) {
- creds_entry__free_unpacked(ce, NULL);
- return NULL;
- }
-
- return ce;
-}
-
-static int prepare_creds(CredsEntry *ce, struct task_restore_args *args)
-{
- args->creds = *ce;
- args->creds.cap_inh = args->cap_inh;
- memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
- args->creds.cap_eff = args->cap_eff;
- memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
- args->creds.cap_prm = args->cap_prm;
- memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
- args->creds.cap_bnd = args->cap_bnd;
- memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
-
- /*
- * We can set supplementary groups here. This won't affect any
- * permission checks for us (we're still root) and will not be
- * reset by subsequent creds changes in restorer.
- */
-
- BUILD_BUG_ON(sizeof(*ce->groups) != sizeof(gid_t));
- if (setgroups(ce->n_groups, ce->groups) < 0) {
- pr_perror("Can't set supplementary groups");
- return -1;
- }
-
- creds_entry__free_unpacked(ce, NULL);
-
- args->cap_last_cap = kdat.last_cap;
-
- /* XXX -- validate creds here? */
-
- return 0;
-}
-
static int prepare_mm(pid_t pid, struct task_restore_args *args)
{
int exe_fd, i, ret = -1;
@@ -2823,6 +2756,175 @@ out:
extern void __gcov_flush(void) __attribute__((weak));
void __gcov_flush(void) {}
+static void rst_reloc_creds(struct thread_restore_args *thread_args,
+ unsigned long *creds_pos_next)
+{
+ struct thread_creds_args *args;
+
+ if (unlikely(!*creds_pos_next))
+ return;
+
+ args = rst_mem_remap_ptr(*creds_pos_next, RM_PRIVATE);
+
+ if (args->lsm_profile)
+ args->lsm_profile = rst_mem_remap_ptr(args->mem_lsm_profile_pos, RM_PRIVATE);
+ if (args->groups)
+ args->groups = rst_mem_remap_ptr(args->mem_groups_pos, RM_PRIVATE);
+
+ *creds_pos_next = args->mem_pos_next;
+ thread_args->creds_args = args;
+}
+
+static struct thread_creds_args *
+rst_prep_creds_args(struct thread_creds_args *prev, CredsEntry *ce)
+{
+ unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
+ struct thread_creds_args *args;
+
+ if (!verify_cap_size(ce)) {
+ pr_err("Caps size mismatch %d %d %d %d\n",
+ (int)ce->n_cap_inh, (int)ce->n_cap_eff,
+ (int)ce->n_cap_prm, (int)ce->n_cap_bnd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!may_restore(ce))
+ return ERR_PTR(-EINVAL);
+
+ args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
+ if (!args)
+ return ERR_PTR(-ENOMEM);
+
+ args->cap_last_cap = kdat.last_cap;
+ memcpy(&args->creds, ce, sizeof(args->creds));
+
+ if (ce->lsm_profile || opts.lsm_supplied) {
+ char *rendered, *profile;
+
+ profile = ce->lsm_profile;
+ if (opts.lsm_supplied)
+ profile = opts.lsm_profile;
+
+ if (validate_lsm(profile) < 0)
+ return ERR_PTR(-EINVAL);
+
+ if (profile) {
+ size_t lsm_profile_len;
+
+ if (render_lsm_profile(profile, &rendered))
+ return ERR_PTR(-EINVAL);
+
+ args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
+ lsm_profile_len = strlen(rendered);
+ args->lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
+ if (!args->lsm_profile) {
+ xfree(rendered);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ strncpy(args->lsm_profile, rendered, lsm_profile_len);
+ xfree(rendered);
+ }
+ } else {
+ args->lsm_profile = NULL;
+ args->mem_lsm_profile_pos = 0;
+ }
+
+ /*
+ * Zap fields which we cant use.
+ */
+ args->creds.cap_inh = NULL;
+ args->creds.cap_eff = NULL;
+ args->creds.cap_prm = NULL;
+ args->creds.cap_bnd = NULL;
+ args->creds.groups = NULL;
+ args->creds.lsm_profile = NULL;
+
+ memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
+ memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
+ memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
+ memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
+
+ if (ce->n_groups) {
+ args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
+ args->groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
+ if (!args->groups)
+ return ERR_PTR(-ENOMEM);
+ memcpy(args->groups, ce->groups, ce->n_groups * sizeof(u32));
+ } else {
+ args->groups = NULL;
+ args->mem_groups_pos = 0;
+ }
+
+ args->mem_pos_next = 0;
+
+ if (prev)
+ prev->mem_pos_next = this_pos;
+ return args;
+}
+
+static int rst_prep_creds_from_img(pid_t pid)
+{
+ CredsEntry *ce = NULL;
+ struct cr_img *img;
+ int ret;
+
+ img = open_image(CR_FD_CREDS, O_RSTR, pid);
+ if (!img)
+ return -ENOENT;
+
+ ret = pb_read_one(img, &ce, PB_CREDS);
+ close_image(img);
+
+ if (ret > 0) {
+ struct thread_creds_args *args = NULL;
+
+ args = rst_prep_creds_args(NULL, ce);
+ if (IS_ERR(args))
+ ret = PTR_ERR(args);
+ else
+ ret = 0;
+ }
+ creds_entry__free_unpacked(ce, NULL);
+ return ret;
+}
+
+static int rst_prep_creds(pid_t pid, CoreEntry *core, unsigned long *creds_pos)
+{
+ size_t i;
+
+ /*
+ * This is _really_ very old image
+ * format where @thread_core were not
+ * present. It means we don't have
+ * creds either, just ignore and exit
+ * early.
+ */
+ if (unlikely(!core->thread_core)) {
+ *creds_pos = 0;
+ return 0;
+ }
+
+ *creds_pos = rst_mem_cpos(RM_PRIVATE);
+
+ /*
+ * Old format: one Creds per task carried in own image file.
+ */
+ if (!core->thread_core->creds)
+ return rst_prep_creds_from_img(pid);
+
+ for (i = 0; i < current->nr_threads; i++) {
+ CredsEntry *ce = current->core[i]->thread_core->creds;
+ struct thread_creds_args *args = NULL;
+
+ args = rst_prep_creds_args(args, ce);
+ if (IS_ERR(args))
+ return PTR_ERR(args);
+ }
+
+ return 0;
+}
+
static int sigreturn_restore(pid_t pid, CoreEntry *core)
{
void *mem = MAP_FAILED;
@@ -2850,10 +2952,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
unsigned long aio_rings;
MmEntry *mm = rsti(current)->mm;
- char *lsm = NULL;
- int lsm_profile_len = 0;
- unsigned long lsm_pos = 0;
-
int n_seccomp_filters = 0;
unsigned long seccomp_filter_pos = 0;
@@ -2861,7 +2959,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
struct vm_area_list *vmas = &rsti(current)->vmas;
int i;
- CredsEntry *creds;
+ unsigned long creds_pos = 0;
pr_info("Restore via sigreturn\n");
@@ -2925,6 +3023,13 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
goto err_nv;
/*
+ * Read creds info for every thread and allocate memory
+ * needed so we can use this data inside restorer.
+ */
+ if (rst_prep_creds(pid, core, &creds_pos))
+ goto err_nv;
+
+ /*
* We're about to search for free VM area and inject the restorer blob
* into it. No irrelevent mmaps/mremaps beyond this point, otherwise
* this unwanted mapping might get overlapped by the restorer.
@@ -2934,45 +3039,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
if (ret < 0)
goto err;
- creds = read_creds(pid);
- if (!creds)
- goto err;
-
- if (creds->lsm_profile || opts.lsm_supplied) {
- char *rendered, *profile;
- int ret;
-
- profile = creds->lsm_profile;
- if (opts.lsm_supplied)
- profile = opts.lsm_profile;
-
- if (validate_lsm(profile) < 0)
- return -1;
-
- if (profile) {
- ret = render_lsm_profile(profile, &rendered);
- if (ret < 0) {
- goto err_nv;
- }
-
- lsm_pos = rst_mem_cpos(RM_PRIVATE);
- lsm_profile_len = strlen(rendered);
- lsm = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
- if (!lsm) {
- xfree(rendered);
- goto err_nv;
- }
-
- strncpy(lsm, rendered, lsm_profile_len);
- xfree(rendered);
- }
-
- }
-
if (seccomp_filters_get_rst_pos(core, &n_seccomp_filters, &seccomp_filter_pos) < 0)
goto err;
-
rst_mem_size = rst_mem_lock();
restore_bootstrap_len = restorer_len + args_len + rst_mem_size;
@@ -3048,10 +3117,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
goto err;
}
- ret = prepare_creds(creds, task_args);
- if (ret < 0)
- goto err;
-
/*
* Get a reference to shared memory area which is
* used to signal if shmem restoration complete
@@ -3102,11 +3167,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
if (core->tc->has_seccomp_mode)
task_args->seccomp_mode = core->tc->seccomp_mode;
- if (lsm)
- task_args->creds.lsm_profile = rst_mem_remap_ptr(lsm_pos, RM_PRIVATE);
- else
- task_args->creds.lsm_profile = NULL;
-
/*
* Arguments for task restoration.
*/
@@ -3124,6 +3184,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
* Fill up per-thread data.
*/
for (i = 0; i < current->nr_threads; i++) {
+ unsigned long creds_pos_next = creds_pos;
CoreEntry *tcore;
struct rt_sigframe *sigframe;
@@ -3157,6 +3218,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
core_get_tls(tcore, &thread_args[i].tls);
+ rst_reloc_creds(&thread_args[i], &creds_pos_next);
+
if (tcore->thread_core) {
thread_args[i].has_futex = true;
thread_args[i].futex_rla = tcore->thread_core->futex_rla;
diff --git a/include/restorer.h b/include/restorer.h
index 74be81dacb9b..4570909f10b8 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -72,6 +72,26 @@ struct task_restore_core_args;
* simpler, force both _args alignment be 64 bytes.
*/
+struct thread_creds_args {
+ CredsEntry creds;
+
+ unsigned int cap_last_cap;
+
+ u32 cap_inh[CR_CAP_SIZE];
+ u32 cap_prm[CR_CAP_SIZE];
+ u32 cap_eff[CR_CAP_SIZE];
+ u32 cap_bnd[CR_CAP_SIZE];
+
+ unsigned int secbits;
+ char *lsm_profile;
+ unsigned int *groups;
+
+ unsigned long mem_lsm_profile_pos;
+ unsigned long mem_groups_pos;
+
+ unsigned long mem_pos_next;
+};
+
struct thread_restore_args {
struct restore_mem_zone mem_zone;
@@ -93,6 +113,8 @@ struct thread_restore_args {
unsigned int siginfo_n;
int pdeath_sig;
+
+ struct thread_creds_args *creds_args;
} __aligned(64);
struct task_restore_args {
@@ -153,13 +175,6 @@ struct task_restore_args {
struct itimerval itimers[3];
- CredsEntry creds;
- u32 cap_inh[CR_CAP_SIZE];
- u32 cap_prm[CR_CAP_SIZE];
- u32 cap_eff[CR_CAP_SIZE];
- u32 cap_bnd[CR_CAP_SIZE];
- u32 cap_last_cap;
-
MmEntry mm;
auxv_t mm_saved_auxv[AT_VECTOR_SIZE];
u32 mm_saved_auxv_size;
diff --git a/pie/restorer.c b/pie/restorer.c
index 4665c5d78872..1cce88d4c322 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -121,8 +121,9 @@ static int lsm_set_label(char *label, int procfd)
return 0;
}
-static int restore_creds(CredsEntry *ce, int procfd)
+static int restore_creds(struct thread_creds_args *args, int procfd)
{
+ CredsEntry *ce = &args->creds;
int b, i, ret;
struct cap_header hdr;
struct cap_data data[_LINUX_CAPABILITY_U32S_3];
@@ -132,6 +133,17 @@ static int restore_creds(CredsEntry *ce, int procfd)
*/
/*
+ * Setup supplementary group IDs early.
+ */
+ if (args->groups) {
+ ret = sys_setgroups(ce->n_groups, args->groups);
+ if (ret) {
+ pr_err("Can't setup supplementary group IDs: %d\n", ret);
+ return -1;
+ }
+ }
+
+ /*
* First -- set the SECURE_NO_SETUID_FIXUP bit not to
* lose caps bits when changing xids.
*/
@@ -190,9 +202,9 @@ static int restore_creds(CredsEntry *ce, int procfd)
for (b = 0; b < CR_CAP_SIZE; b++) {
for (i = 0; i < 32; i++) {
- if (b * 32 + i > cap_last_cap)
+ if (b * 32 + i > args->cap_last_cap)
break;
- if (ce->cap_bnd[b] & (1 << i))
+ if (args->cap_bnd[b] & (1 << i))
/* already set */
continue;
ret = sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0);
@@ -215,9 +227,9 @@ static int restore_creds(CredsEntry *ce, int procfd)
BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE);
for (i = 0; i < CR_CAP_SIZE; i++) {
- data[i].eff = ce->cap_eff[i];
- data[i].prm = ce->cap_prm[i];
- data[i].inh = ce->cap_inh[i];
+ data[i].eff = args->cap_eff[i];
+ data[i].prm = args->cap_prm[i];
+ data[i].inh = args->cap_inh[i];
}
ret = sys_capset(&hdr, data);
@@ -226,9 +238,8 @@ static int restore_creds(CredsEntry *ce, int procfd)
return -1;
}
- if (lsm_set_label(ce->lsm_profile, procfd) < 0)
+ if (lsm_set_label(args->lsm_profile, procfd) < 0)
return -1;
-
return 0;
}
@@ -443,7 +454,7 @@ long __export_restore_thread(struct thread_restore_args *args)
if (restore_thread_common(rt_sigframe, args))
goto core_restore_end;
- ret = restore_creds(&args->ta->creds, args->ta->proc_fd);
+ ret = restore_creds(args->creds_args, args->ta->proc_fd);
if (ret)
goto core_restore_end;
@@ -884,7 +895,7 @@ long __export_restore_task(struct task_restore_args *args)
log_set_fd(args->logfd);
log_set_loglevel(args->loglevel);
- cap_last_cap = args->cap_last_cap;
+ cap_last_cap = args->t->creds_args->cap_last_cap;
pr_info("Switched to the restorer %d\n", my_pid);
@@ -1262,7 +1273,7 @@ long __export_restore_task(struct task_restore_args *args)
* thus restore* creds _after_ all of the above.
*/
- ret = restore_creds(&args->creds, args->proc_fd);
+ ret = restore_creds(args->t->creds_args, args->proc_fd);
ret = ret || restore_dumpable_flag(&args->mm);
ret = ret || restore_pdeath_sig(args->t);
--
2.5.0
More information about the CRIU
mailing list