[CRIU] [PATCH 06/10] crtools: restore pending signals (v2)
Pavel Emelyanov
xemul at parallels.com
Thu Mar 14 12:12:33 EDT 2013
On 03/05/2013 06:54 PM, Andrey Vagin wrote:
> Read siginfo-s from images and send them to itself by sigqueueinfo.
>
> v2: Don't remap task_args and thread_args
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-restore.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> include/restorer.h | 6 +++
> pie/restorer.c | 54 ++++++++++++++++++++++++++-
> 3 files changed, 163 insertions(+), 3 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 1b7ee8f..3ead7c6 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -61,6 +61,7 @@
> #include "protobuf/itimer.pb-c.h"
> #include "protobuf/vma.pb-c.h"
> #include "protobuf/rlimit.pb-c.h"
> +#include "protobuf/siginfo.pb-c.h"
>
> #include "asm/restore.h"
>
> @@ -1636,6 +1637,58 @@ static int prepare_rlimits(int pid, struct task_restore_core_args *ta)
> return ret;
> }
>
> +static int open_signal_image(int type, pid_t pid, siginfo_t **ptr,
> + unsigned long *size, int *nr)
> +{
> + int fd, ret, n;
> +
> + fd = open_image_ro(type, pid);
> + if (fd < 0)
> + return -1;
> +
> + n = 0;
> +
> + while (1) {
> + SiginfoEntry *sie;
> + siginfo_t *info;
> +
> + ret = pb_read_one_eof(fd, &sie, PB_SIGINFO);
> + if (ret <= 0)
> + break;
> + if (sie->siginfo.len != sizeof(siginfo_t)) {
> + pr_err("Unknown image format");
> + BUG();
> + }
> + info = (siginfo_t *) sie->siginfo.data;
> +
> + if ((*nr + 1) * sizeof(siginfo_t) > *size) {
> + unsigned long new_size = *size + PAGE_SIZE;
> +
> + if (*ptr == NULL)
> + *ptr = mmap(NULL, new_size, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANON, 0, 0);
> + else
> + *ptr = mremap(*ptr, *size, new_size, MREMAP_MAYMOVE);
> + if (*ptr == MAP_FAILED) {
> + pr_perror("Can't allocate memory for siginfo-s\n");
> + return -1;
fd is not closed here
> + }
> +
> + *size = new_size;
> + }
> +
> + memcpy(*ptr + *nr, info, sizeof(*info));
> + (*nr)++;
> + n++;
> +
> + siginfo_entry__free_unpacked(sie, NULL);
> + }
> +
> + close(fd);
> +
> + return ret ? : n;
> +}
> +
> extern void __gcov_flush(void) __attribute__((weak));
> void __gcov_flush(void) {}
>
> @@ -1654,6 +1707,11 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>
> struct task_restore_core_args *task_args;
> struct thread_restore_args *thread_args;
> + siginfo_t *siginfo_chunk = NULL;
> + int siginfo_nr = 0;
> + int siginfo_shared_nr = 0;
> + int *siginfo_priv_nr;
> + unsigned long siginfo_size = 0;
>
> struct vm_area_list self_vmas;
> int i;
> @@ -1688,12 +1746,38 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> current->nr_threads,
> KBYTES(restore_thread_vma_len));
>
> + siginfo_priv_nr = xmalloc(sizeof(int) * current->nr_threads);
> + if (siginfo_priv_nr == NULL)
> + goto err;
> +
> + ret = open_signal_image(CR_FD_SIGNAL, pid, &siginfo_chunk,
> + &siginfo_size, &siginfo_nr);
> + if (ret < 0) {
> + if (errno != ENOENT) /* backward compatiblity */
> + goto err;
The ENOENT check should be in open_signal_image
> + ret = 0;
> + }
> + siginfo_shared_nr = ret;
> +
> + for (i = 0; i < current->nr_threads; i++) {
> + ret = open_signal_image(CR_FD_PSIGNAL,
> + current->threads[i].virt, &siginfo_chunk,
> + &siginfo_size, &siginfo_nr);
> + if (ret < 0) {
> + if (errno != ENOENT) /* backward compatiblity */
> + goto err;
> + ret = 0;
> + }
> + siginfo_priv_nr[i] = ret;
> + }
> +
> restore_bootstrap_len = restorer_len +
> restore_task_vma_len +
> restore_thread_vma_len +
> SHMEMS_SIZE + TASK_ENTRIES_SIZE +
> self_vmas_len + vmas_len +
> - rst_tcp_socks_size;
> + rst_tcp_socks_size +
> + siginfo_size;
>
> /*
> * Restorer is a blob (code + args) that will get mapped in some
> @@ -1744,6 +1828,21 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> task_args = mem;
> thread_args = mem + restore_task_vma_len;
>
> + mem += restore_task_vma_len + restore_thread_vma_len;
> + if (siginfo_chunk) {
> + siginfo_chunk = mremap(siginfo_chunk, siginfo_size, siginfo_size,
> + MREMAP_FIXED | MREMAP_MAYMOVE, mem);
> + if (siginfo_chunk == MAP_FAILED) {
> + pr_perror("mremap");
> + goto err;
> + }
> + }
> +
> + task_args->siginfo_size = siginfo_size;
> + task_args->siginfo_nr = siginfo_shared_nr;
> + task_args->siginfo = siginfo_chunk;
> + siginfo_chunk += task_args->siginfo_nr;
> +
> /*
> * Get a reference to shared memory area which is
> * used to signal if shmem restoration complete
> @@ -1755,7 +1854,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> * address.
> */
>
> - mem += restore_task_vma_len + restore_thread_vma_len;
> + mem += siginfo_size;
> ret = shmem_remap(rst_shmems, mem, SHMEMS_SIZE);
> if (ret < 0)
> goto err;
> @@ -1809,6 +1908,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> CoreEntry *tcore;
>
> thread_args[i].pid = current->threads[i].virt;
> + thread_args[i].siginfo_nr = siginfo_priv_nr[i];
> + thread_args[i].siginfo = siginfo_chunk;
> + siginfo_chunk += thread_args[i].siginfo_nr;
>
> /* skip self */
> if (thread_args[i].pid == pid) {
> diff --git a/include/restorer.h b/include/restorer.h
> index 15dade8..336e28d 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -82,6 +82,9 @@ struct thread_restore_args {
> fpu_state_t fpu_state;
>
> u32 tls;
> +
> + siginfo_t *siginfo;
> + unsigned int siginfo_nr;
> } __aligned(sizeof(long));
>
> struct task_restore_core_args {
> @@ -101,6 +104,9 @@ struct task_restore_core_args {
> struct task_entries *task_entries;
> VmaEntry *self_vmas;
> VmaEntry *tgt_vmas;
> + siginfo_t *siginfo;
> + unsigned int siginfo_nr;
> + unsigned long siginfo_size;
> unsigned int nr_vmas;
> unsigned long premmapped_addr;
> unsigned long premmapped_len;
> diff --git a/pie/restorer.c b/pie/restorer.c
> index 54b9b83..d88fc7b 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -155,6 +155,7 @@ static void restore_rlims(struct task_restore_core_args *ta)
> }
> }
>
> +static int restore_signals(siginfo_t *info, int nr, int group);
Why fwd declaration? Why not put the code here?
> static int restore_thread_common(struct rt_sigframe *sigframe,
> struct thread_restore_args *args)
> {
> @@ -210,8 +211,11 @@ long __export_restore_thread(struct thread_restore_args *args)
> pr_info("%ld: Restored\n", sys_gettid());
>
> restore_finish_stage(CR_STATE_RESTORE);
> - restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);
>
> + if (restore_signals(args->siginfo, args->siginfo_nr, 0))
> + goto core_restore_end;
> +
> + restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);
> futex_dec_and_wake(&thread_inprogress);
>
> new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
> @@ -219,6 +223,7 @@ long __export_restore_thread(struct thread_restore_args *args)
>
> core_restore_end:
> pr_err("Restorer abnormal termination for %ld\n", sys_getpid());
> + futex_abort_and_wake(&task_entries->nr_in_progress);
> sys_exit_group(1);
> return -1;
> }
> @@ -378,6 +383,36 @@ static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
> return 0;
> }
>
> +static int restore_signals(siginfo_t *ptr, int nr, int group)
For symmetry with dump this should be "bool group".
> +{
> + int ret, i;
> + k_rtsigset_t to_block;
> +
> + ksigfillset(&to_block);
> + ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
Signals should have been blocked already. No?
> + if (ret) {
> + pr_err("Unable to block signals %d", ret);
> + return -1;
> + }
> +
> + for (i = 0; i < nr; i++) {
> + siginfo_t *info = ptr + i;
> +
> + pr_info("Restore signal %d group %d\n", info->si_signo, group);
> + if (group)
> + ret = sys_rt_sigqueueinfo(sys_getpid(), info->si_signo, info);
> + else
> + ret = sys_rt_tgsigqueueinfo(sys_getpid(),
> + sys_gettid(), info->si_signo, info);
> + if (ret) {
> + pr_err("Unable to send siginfo %d %x with code %d\n",
> + info->si_signo, info->si_code, ret);
> + return -1;;
> + }
> + }
> +
> + return 0;
> +}
> /*
> * The main routine to restore task via sigreturn.
> * This one is very special, we never return there
> @@ -694,6 +729,14 @@ long __export_restore_task(struct task_restore_core_args *args)
>
> sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(rt_sigset_t));
>
> + ret = restore_signals(args->siginfo, args->siginfo_nr, 1);
> + if (ret)
> + goto core_restore_end;
> +
> + ret = restore_signals(args->t->siginfo, args->t->siginfo_nr, 0);
> + if (ret)
> + goto core_restore_end;
Why isn't this in restore_thread_common, shared with per-thread siginfos restore?
> +
> futex_set_and_wake(&thread_inprogress, args->nr_threads);
>
> restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);
> @@ -701,6 +744,14 @@ long __export_restore_task(struct task_restore_core_args *args)
> /* Wait until children stop to use args->task_entries */
> futex_wait_while_gt(&thread_inprogress, 1);
>
> + if (args->siginfo_size) {
> + ret = sys_munmap(args->siginfo, args->siginfo_size);
> + if (ret < 0) {
> + pr_err("Can't unmap signals %ld\n", ret);
> + goto core_restore_failed;
> + }
> + }
> +
> rst_tcp_socks_all(args->rst_tcp_socks, args->rst_tcp_socks_size);
>
> log_set_fd(-1);
> @@ -740,6 +791,7 @@ long __export_restore_task(struct task_restore_core_args *args)
> ARCH_RT_SIGRETURN(new_sp);
>
> core_restore_end:
> + futex_abort_and_wake(&task_entries->nr_in_progress);
This is new here. Why?
> pr_err("Restorer fail %ld\n", sys_getpid());
> sys_exit_group(1);
> return -1;
>
More information about the CRIU
mailing list