[CRIU] [PATCH] criu: dump and restore cpu affinity of each thread

Alexander Mihalicyn alexander at mihalicyn.com
Thu Nov 26 12:45:41 MSK 2020


Hi!

Thanks for your contribution to the CRIU project!
Couldn't you post your patchset on GitHub
https://github.com/checkpoint-restore/criu/pulls ?

Regards,
Alex

On Thu, Nov 26, 2020 at 12:32 PM Sang Yan <sangyan at huawei.com> wrote:
>
> Criu should dump and restore threads' or processes'
> cpu affinity.
>
> Add one entry of thread_cpuallow_entry into
> thread_core_entry to save cpu affinity info.
>
> Restore it after threads restored but before running.
>
> Signed-off-by: Sang Yan <sangyan at huawei.com>
> ---
>  compel/arch/arm/plugins/std/syscalls/syscall.def   |  1 +
>  .../ppc64/plugins/std/syscalls/syscall-ppc64.tbl   |  1 +
>  .../s390/plugins/std/syscalls/syscall-s390.tbl     |  1 +
>  .../arch/x86/plugins/std/syscalls/syscall_32.tbl   |  1 +
>  .../arch/x86/plugins/std/syscalls/syscall_64.tbl   |  1 +
>  criu/cr-dump.c                                     | 14 +++++++++++
>  criu/cr-restore.c                                  | 22 ++++++++++++++++
>  criu/include/restorer.h                            |  3 +++
>  criu/pie/restorer.c                                | 29 ++++++++++++++++++++++
>  criu/pstree.c                                      |  7 ++++++
>  images/core.proto                                  |  5 ++++
>  11 files changed, 85 insertions(+)
>
> diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
> index f7ebc85..1c70388 100644
> --- a/compel/arch/arm/plugins/std/syscalls/syscall.def
> +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
> @@ -116,3 +116,4 @@ fsopen                              430     430     (char *fsname, unsigned int flags)
>  fsconfig                       431     431     (int fd, unsigned int cmd, const char *key, const char *value, int aux)
>  fsmount                                432     432     (int fd, unsigned int flags, unsigned int attr_flags)
>  clone3                         435     435     (struct clone_args *uargs, size_t size)
> +sched_setaffinity              122     241     (int fd, size_t cpusetsize, const cpu_set_t *mask)
> diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
> index 1afaf1e..460daf8 100644
> --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
> +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
> @@ -112,3 +112,4 @@ __NR_fsopen         430             sys_fsopen              (char *fsname, unsigned int flags)
>  __NR_fsconfig          431             sys_fsconfig            (int fd, unsigned int cmd, const char *key, const char *value, int aux)
>  __NR_fsmount           432             sys_fsmount             (int fd, unsigned int flags, unsigned int attr_flags)
>  __NR_clone3            435             sys_clone3              (struct clone_args *uargs, size_t size)
> +__NR_sched_setaffinity 222             sys_sched_setaffinity   (int fd, size_t cpusetsize, const cpu_set_t *mask)
> diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
> index ae6fdb5..c0bba39 100644
> --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
> +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
> @@ -112,3 +112,4 @@ __NR_fsopen         430             sys_fsopen              (char *fsname, unsigned int flags)
>  __NR_fsconfig          431             sys_fsconfig            (int fd, unsigned int cmd, const char *key, const char *value, int aux)
>  __NR_fsmount           432             sys_fsmount             (int fd, unsigned int flags, unsigned int attr_flags)
>  __NR_clone3            435             sys_clone3              (struct clone_args *uargs, size_t size)
> +__NR_sched_setaffinity 239             sys_sched_setaffinity   (int fd, size_t cpusetsize, const cpu_set_t *mask)
> diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
> index 7a48711..29c13e3 100644
> --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
> +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
> @@ -63,6 +63,7 @@ __NR_mincore          218             sys_mincore             (void *addr, unsigned long size, unsigned char *
>  __NR_madvise           219             sys_madvise             (unsigned long start, size_t len, int behavior)
>  __NR_gettid            224             sys_gettid              (void)
>  __NR_futex             240             sys_futex               (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
> +__NR_sched_setaffinity 241             sys_sched_setaffinity   (int fd, size_t cpusetsize, const cpu_set_t *mask)
>  __NR_set_thread_area   243             sys_set_thread_area     (user_desc_t *info)
>  __NR_get_thread_area   244             sys_get_thread_area     (user_desc_t *info)
>  __NR_io_setup          245             sys_io_setup            (unsigned nr_reqs, aio_context_t *ctx32p)
> diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
> index 6667c07..74f5482 100644
> --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
> +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
> @@ -73,6 +73,7 @@ __NR_mount                    165             sys_mount               (char *dev_nmae, char *dir_name, char *type, unsign
>  __NR_umount2                   166             sys_umount2             (char *name, int flags)
>  __NR_gettid                    186             sys_gettid              (void)
>  __NR_futex                     202             sys_futex               (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
> +__NR_sched_setaffinity         203             sys_sched_setaffinity   (int fd, size_t cpusetsize, const cpu_set_t *mask)
>  __NR_set_thread_area           205             sys_set_thread_area     (user_desc_t *info)
>  __NR_io_setup                  206             sys_io_setup            (unsigned nr_events, aio_context_t *ctx)
>  __NR_io_getevents              208             sys_io_getevents        (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
> index 193a49c..6ffd526 100644
> --- a/criu/cr-dump.c
> +++ b/criu/cr-dump.c
> @@ -140,6 +140,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
>  {
>         int ret;
>         struct sched_param sp;
> +       cpu_set_t cpumask;
>
>         BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */
>
> @@ -185,6 +186,19 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
>         tc->has_sched_nice = true;
>         tc->sched_nice = ret;
>
> +       pr_info("\tdumping cpu_allowed for %d\n", pid);
> +       ret = syscall(__NR_sched_getaffinity, pid, sizeof(cpumask), &cpumask);
> +       if (ret < 0) {
> +               pr_perror("Can't get sched affinity for %d", pid);
> +               return -1;
> +       }
> +       memcpy(tc->cpu_allowed->cpumask, &cpumask, __CPU_SETSIZE);
> +       pr_info("\t 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
> +               tc->cpu_allowed->cpumask[3],
> +               tc->cpu_allowed->cpumask[2],
> +               tc->cpu_allowed->cpumask[1],
> +               tc->cpu_allowed->cpumask[0]);
> +
>         return 0;
>  }
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 8af2e29..375eb54 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -118,6 +118,7 @@ static int prepare_restorer_blob(void);
>  static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core);
>  static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core);
>  static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core);
> +static int prepare_alloweds(int pid, struct task_restore_args *ta, CoreEntry *leader_core);
>
>  /*
>   * Architectures can overwrite this function to restore registers that are not
> @@ -922,6 +923,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>         if (prepare_signals(pid, ta, core))
>                 return -1;
>
> +       if (prepare_alloweds(pid, ta, core))
> +               return -1;
> +
>         if (prepare_posix_timers(pid, ta, core))
>                 return -1;
>
> @@ -3225,6 +3229,23 @@ out:
>         return ret;
>  }
>
> +static int prepare_alloweds(int pid, struct task_restore_args *ta, CoreEntry *leader_core)
> +{
> +       int i;
> +       cpu_set_t *cpumaks;
> +
> +       ta->cpualloweds = (cpu_set_t *)rst_mem_align_cpos(RM_PRIVATE);
> +
> +       for (i = 0; i < current->nr_threads; i++) {
> +               cpumaks = rst_mem_alloc(sizeof(cpu_set_t), RM_PRIVATE);
> +               if (!cpumaks)
> +                       return -1;
> +
> +               memcpy(cpumaks, current->core[i]->thread_core->cpu_allowed->cpumask, sizeof(cpu_set_t));
> +       }
> +       return 0;
> +}
> +
>  extern void __gcov_flush(void) __attribute__((weak));
>  void __gcov_flush(void) {}
>
> @@ -3684,6 +3705,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>         RST_MEM_FIXUP_PPTR(task_args->timerfd);
>         RST_MEM_FIXUP_PPTR(task_args->posix_timers);
>         RST_MEM_FIXUP_PPTR(task_args->siginfo);
> +       RST_MEM_FIXUP_PPTR(task_args->cpualloweds);
>         RST_MEM_FIXUP_PPTR(task_args->rlims);
>         RST_MEM_FIXUP_PPTR(task_args->helpers);
>         RST_MEM_FIXUP_PPTR(task_args->zombies);
> diff --git a/criu/include/restorer.h b/criu/include/restorer.h
> index dfb4e6b..67df9f5 100644
> --- a/criu/include/restorer.h
> +++ b/criu/include/restorer.h
> @@ -1,6 +1,7 @@
>  #ifndef __CR_RESTORER_H__
>  #define __CR_RESTORER_H__
>
> +#include <sched.h>
>  #include <signal.h>
>  #include <limits.h>
>  #include <sys/resource.h>
> @@ -162,6 +163,8 @@ struct task_restore_args {
>         siginfo_t                       *siginfo;
>         unsigned int                    siginfo_n;
>
> +       cpu_set_t                       *cpualloweds;
> +
>         struct rst_tcp_sock             *tcp_socks;
>         unsigned int                    tcp_socks_n;
>
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index b3d7e2b..833b6bb 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -432,6 +432,31 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group)
>         return 0;
>  }
>
> +static int restore_cpuallowed(struct task_restore_args *args)
> +{
> +       int i;
> +       int pid;
> +       int ret;
> +       cpu_set_t *cpumask;
> +
> +       for (i = 0; i < args->nr_threads; i++) {
> +               pid = args->thread_args[i].pid;
> +               cpumask = &args->cpualloweds[i];
> +               pr_info("Restoring %d cpu_allowed %lx, %lx, %lx, %lx\n", pid,
> +                       cpumask->__bits[3],
> +                       cpumask->__bits[2],
> +                       cpumask->__bits[1],
> +                       cpumask->__bits[0]);
> +               ret = sys_sched_setaffinity(pid, sizeof(cpu_set_t), cpumask);
> +               if (ret) {
> +                       pr_err("\t Restore %d cpumask failed.\n", pid);
> +                       return ret;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
>  static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args)
>  {
>         unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0;
> @@ -1900,6 +1925,10 @@ long __export_restore_task(struct task_restore_args *args)
>         if (ret)
>                 goto core_restore_end;
>
> +       ret = restore_cpuallowed(args);
> +       if (ret)
> +               goto core_restore_end;
> +
>         restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
>
>         rst_tcp_socks_all(args);
> diff --git a/criu/pstree.c b/criu/pstree.c
> index f1513dc..d338377 100644
> --- a/criu/pstree.c
> +++ b/criu/pstree.c
> @@ -58,11 +58,13 @@ CoreEntry *core_entry_alloc(int th, int tsk)
>                 CredsEntry *ce = NULL;
>
>                 sz += sizeof(ThreadCoreEntry) + sizeof(ThreadSasEntry) + sizeof(CredsEntry);
> +               sz += sizeof(ThreadCpuallowEntry);
>
>                 sz += CR_CAP_SIZE * sizeof(ce->cap_inh[0]);
>                 sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]);
>                 sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]);
>                 sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]);
> +               sz += __CPU_SETSIZE;
>                 /*
>                  * @groups are dynamic and allocated
>                  * on demand.
> @@ -127,6 +129,11 @@ CoreEntry *core_entry_alloc(int th, int tsk)
>                         ce->cap_eff     = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0]));
>                         ce->cap_bnd     = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0]));
>
> +                       core->thread_core->cpu_allowed = xptr_pull(&m, ThreadCpuallowEntry);
> +                       thread_cpuallow_entry__init(core->thread_core->cpu_allowed);
> +                       core->thread_core->cpu_allowed->n_cpumask = __CPU_SETSIZE / sizeof(uint64_t);
> +                       core->thread_core->cpu_allowed->cpumask = xptr_pull_s(&m, __CPU_SETSIZE);
> +
>                         if (arch_alloc_thread_info(core)) {
>                                 xfree(core);
>                                 core = NULL;
> diff --git a/images/core.proto b/images/core.proto
> index 9e9e393..d9788fd 100644
> --- a/images/core.proto
> +++ b/images/core.proto
> @@ -81,6 +81,10 @@ message thread_sas_entry {
>         required uint32                 ss_flags        = 3;
>  }
>
> +message thread_cpuallow_entry {
> +       repeated uint64                 cpumask         = 1;
> +}
> +
>  message thread_core_entry {
>         required uint64                 futex_rla       = 1;
>         required uint32                 futex_rla_len   = 2;
> @@ -99,6 +103,7 @@ message thread_core_entry {
>
>         optional string                 comm            = 13;
>         optional uint64                 blk_sigset_extended     = 14;
> +       required thread_cpuallow_entry  cpu_allowed     = 15;
>  }
>
>  message task_rlimits_entry {
> --
> 2.9.5
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list