[CRIU] [PATCH] criu: dump and restore cpu affinity of each thread
Sang Yan
sangyan at huawei.com
Thu Nov 26 12:17:56 MSK 2020
Criu should dump and restore threads' or processes'
cpu affinity.
Add one entry of thread_cpuallow_entry into
thread_core_entry to save cpu affinity info.
Restore it after threads restored but before running.
Signed-off-by: Sang Yan <sangyan at huawei.com>
---
compel/arch/arm/plugins/std/syscalls/syscall.def | 1 +
.../ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 +
.../s390/plugins/std/syscalls/syscall-s390.tbl | 1 +
.../arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 +
.../arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 +
criu/cr-dump.c | 14 +++++++++++
criu/cr-restore.c | 22 ++++++++++++++++
criu/include/restorer.h | 3 +++
criu/pie/restorer.c | 29 ++++++++++++++++++++++
criu/pstree.c | 7 ++++++
images/core.proto | 5 ++++
11 files changed, 85 insertions(+)
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
index f7ebc85..1c70388 100644
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
@@ -116,3 +116,4 @@ fsopen 430 430 (char *fsname, unsigned int flags)
fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux)
fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags)
clone3 435 435 (struct clone_args *uargs, size_t size)
+sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask)
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
index 1afaf1e..460daf8 100644
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
@@ -112,3 +112,4 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
index ae6fdb5..c0bba39 100644
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
@@ -112,3 +112,4 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
+__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
index 7a48711..29c13e3 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
@@ -63,6 +63,7 @@ __NR_mincore 218 sys_mincore (void *addr, unsigned long size, unsigned char *
__NR_madvise 219 sys_madvise (unsigned long start, size_t len, int behavior)
__NR_gettid 224 sys_gettid (void)
__NR_futex 240 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+__NR_sched_setaffinity 241 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_set_thread_area 243 sys_set_thread_area (user_desc_t *info)
__NR_get_thread_area 244 sys_get_thread_area (user_desc_t *info)
__NR_io_setup 245 sys_io_setup (unsigned nr_reqs, aio_context_t *ctx32p)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
index 6667c07..74f5482 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
@@ -73,6 +73,7 @@ __NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsign
__NR_umount2 166 sys_umount2 (char *name, int flags)
__NR_gettid 186 sys_gettid (void)
__NR_futex 202 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
+__NR_sched_setaffinity 203 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_set_thread_area 205 sys_set_thread_area (user_desc_t *info)
__NR_io_setup 206 sys_io_setup (unsigned nr_events, aio_context_t *ctx)
__NR_io_getevents 208 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 193a49c..6ffd526 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -140,6 +140,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
{
int ret;
struct sched_param sp;
+ cpu_set_t cpumask;
BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */
@@ -185,6 +186,19 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
tc->has_sched_nice = true;
tc->sched_nice = ret;
+ pr_info("\tdumping cpu_allowed for %d\n", pid);
+ ret = syscall(__NR_sched_getaffinity, pid, sizeof(cpumask), &cpumask);
+ if (ret < 0) {
+ pr_perror("Can't get sched affinity for %d", pid);
+ return -1;
+ }
+ memcpy(tc->cpu_allowed->cpumask, &cpumask, __CPU_SETSIZE);
+ pr_info("\t 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+ tc->cpu_allowed->cpumask[3],
+ tc->cpu_allowed->cpumask[2],
+ tc->cpu_allowed->cpumask[1],
+ tc->cpu_allowed->cpumask[0]);
+
return 0;
}
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 8af2e29..375eb54 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -118,6 +118,7 @@ static int prepare_restorer_blob(void);
static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core);
static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core);
static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core);
+static int prepare_alloweds(int pid, struct task_restore_args *ta, CoreEntry *leader_core);
/*
* Architectures can overwrite this function to restore registers that are not
@@ -922,6 +923,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
if (prepare_signals(pid, ta, core))
return -1;
+ if (prepare_alloweds(pid, ta, core))
+ return -1;
+
if (prepare_posix_timers(pid, ta, core))
return -1;
@@ -3225,6 +3229,23 @@ out:
return ret;
}
+static int prepare_alloweds(int pid, struct task_restore_args *ta, CoreEntry *leader_core)
+{
+ int i;
+ cpu_set_t *cpumaks;
+
+ ta->cpualloweds = (cpu_set_t *)rst_mem_align_cpos(RM_PRIVATE);
+
+ for (i = 0; i < current->nr_threads; i++) {
+ cpumaks = rst_mem_alloc(sizeof(cpu_set_t), RM_PRIVATE);
+ if (!cpumaks)
+ return -1;
+
+ memcpy(cpumaks, current->core[i]->thread_core->cpu_allowed->cpumask, sizeof(cpu_set_t));
+ }
+ return 0;
+}
+
extern void __gcov_flush(void) __attribute__((weak));
void __gcov_flush(void) {}
@@ -3684,6 +3705,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
RST_MEM_FIXUP_PPTR(task_args->timerfd);
RST_MEM_FIXUP_PPTR(task_args->posix_timers);
RST_MEM_FIXUP_PPTR(task_args->siginfo);
+ RST_MEM_FIXUP_PPTR(task_args->cpualloweds);
RST_MEM_FIXUP_PPTR(task_args->rlims);
RST_MEM_FIXUP_PPTR(task_args->helpers);
RST_MEM_FIXUP_PPTR(task_args->zombies);
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index dfb4e6b..67df9f5 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -1,6 +1,7 @@
#ifndef __CR_RESTORER_H__
#define __CR_RESTORER_H__
+#include <sched.h>
#include <signal.h>
#include <limits.h>
#include <sys/resource.h>
@@ -162,6 +163,8 @@ struct task_restore_args {
siginfo_t *siginfo;
unsigned int siginfo_n;
+ cpu_set_t *cpualloweds;
+
struct rst_tcp_sock *tcp_socks;
unsigned int tcp_socks_n;
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index b3d7e2b..833b6bb 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -432,6 +432,31 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group)
return 0;
}
+static int restore_cpuallowed(struct task_restore_args *args)
+{
+ int i;
+ int pid;
+ int ret;
+ cpu_set_t *cpumask;
+
+ for (i = 0; i < args->nr_threads; i++) {
+ pid = args->thread_args[i].pid;
+ cpumask = &args->cpualloweds[i];
+ pr_info("Restoring %d cpu_allowed %lx, %lx, %lx, %lx\n", pid,
+ cpumask->__bits[3],
+ cpumask->__bits[2],
+ cpumask->__bits[1],
+ cpumask->__bits[0]);
+ ret = sys_sched_setaffinity(pid, sizeof(cpu_set_t), cpumask);
+ if (ret) {
+ pr_err("\t Restore %d cpumask failed.\n", pid);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args)
{
unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0;
@@ -1900,6 +1925,10 @@ long __export_restore_task(struct task_restore_args *args)
if (ret)
goto core_restore_end;
+ ret = restore_cpuallowed(args);
+ if (ret)
+ goto core_restore_end;
+
restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
rst_tcp_socks_all(args);
diff --git a/criu/pstree.c b/criu/pstree.c
index f1513dc..d338377 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -58,11 +58,13 @@ CoreEntry *core_entry_alloc(int th, int tsk)
CredsEntry *ce = NULL;
sz += sizeof(ThreadCoreEntry) + sizeof(ThreadSasEntry) + sizeof(CredsEntry);
+ sz += sizeof(ThreadCpuallowEntry);
sz += CR_CAP_SIZE * sizeof(ce->cap_inh[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]);
+ sz += __CPU_SETSIZE;
/*
* @groups are dynamic and allocated
* on demand.
@@ -127,6 +129,11 @@ CoreEntry *core_entry_alloc(int th, int tsk)
ce->cap_eff = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0]));
ce->cap_bnd = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0]));
+ core->thread_core->cpu_allowed = xptr_pull(&m, ThreadCpuallowEntry);
+ thread_cpuallow_entry__init(core->thread_core->cpu_allowed);
+ core->thread_core->cpu_allowed->n_cpumask = __CPU_SETSIZE / sizeof(uint64_t);
+ core->thread_core->cpu_allowed->cpumask = xptr_pull_s(&m, __CPU_SETSIZE);
+
if (arch_alloc_thread_info(core)) {
xfree(core);
core = NULL;
diff --git a/images/core.proto b/images/core.proto
index 9e9e393..d9788fd 100644
--- a/images/core.proto
+++ b/images/core.proto
@@ -81,6 +81,10 @@ message thread_sas_entry {
required uint32 ss_flags = 3;
}
+message thread_cpuallow_entry {
+ repeated uint64 cpumask = 1;
+}
+
message thread_core_entry {
required uint64 futex_rla = 1;
required uint32 futex_rla_len = 2;
@@ -99,6 +103,7 @@ message thread_core_entry {
optional string comm = 13;
optional uint64 blk_sigset_extended = 14;
+ required thread_cpuallow_entry cpu_allowed = 15;
}
message task_rlimits_entry {
--
2.9.5
More information about the CRIU
mailing list