[CRIU] [PATCHv3 28/30] restore/x86: call int80 for compat sigaction restore
Dmitry Safonov
dsafonov at virtuozzo.com
Mon Jun 27 11:16:04 PDT 2016
The kernel patch "x86/signal: add SA_{X32,IA32}_ABI sa_flags" makes
signal's ABI the same as sigaction's syscall ABI instead of per-thread's
TIF_IA32 flag.
So for delivering signals with compatible ABI, we need to call
sigaction through raw int80 exception.
This patch restores signals with int80: cr-restore part and PIE restorer's
part lay in sigaction_compat.c, which compiled for criu binary and for
restorer PIE.
The PIE's part is needed strictly for setting SIGCHLD handler, other
signal handlers are set in cr-restore (as it was before).
Cc: Cyrill Gorcunov <gorcunov at openvz.org>
Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
---
v3: add missed commit description
criu/arch/aarch64/include/asm/restorer.h | 5 ++
criu/arch/arm/include/asm/restorer.h | 5 ++
criu/arch/ppc64/include/asm/restorer.h | 5 ++
criu/arch/x86/Makefile | 2 +
criu/arch/x86/include/asm/restorer.h | 12 +++
criu/arch/x86/include/asm/types.h | 7 ++
criu/arch/x86/restorer.c | 10 +--
criu/arch/x86/sigaction_compat.c | 68 +++++++++++++++
criu/arch/x86/sigaction_compat_pie.c | 1 +
criu/cr-restore.c | 144 ++++++++++++++++++++++++++-----
criu/include/parasite-compat.h | 9 +-
criu/parasite-syscall.c | 7 +-
criu/pie/Makefile | 2 +
criu/pie/restorer.c | 15 +++-
images/sa.proto | 1 +
15 files changed, 257 insertions(+), 36 deletions(-)
create mode 100644 criu/arch/x86/sigaction_compat.c
create mode 120000 criu/arch/x86/sigaction_compat_pie.c
diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
index 19f459a0b08f..80f358c46a42 100644
--- a/criu/arch/aarch64/include/asm/restorer.h
+++ b/criu/arch/aarch64/include/asm/restorer.h
@@ -125,4 +125,9 @@ static inline int ptrace_flush_breakpoints(pid_t pid)
return 0;
}
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
#endif
diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h
index e17a80e7a971..305311799e9a 100644
--- a/criu/arch/arm/include/asm/restorer.h
+++ b/criu/arch/arm/include/asm/restorer.h
@@ -167,4 +167,9 @@ static inline int ptrace_flush_breakpoints(pid_t pid)
return 0;
}
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
#endif
diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h
index e1c08ef17eeb..e9d171a7b1df 100644
--- a/criu/arch/ppc64/include/asm/restorer.h
+++ b/criu/arch/ppc64/include/asm/restorer.h
@@ -131,4 +131,9 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe,
*/
unsigned long sys_shmat(int shmid, const void *shmaddr, int shmflg);
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
#endif /*__CR_ASM_RESTORER_H__*/
diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile
index 5db577340a7d..d8e8251845bf 100644
--- a/criu/arch/x86/Makefile
+++ b/criu/arch/x86/Makefile
@@ -4,3 +4,5 @@ ccflags-y += -iquote $(obj) -iquote $(SRC_DIR) -iquote $(obj)/include -iquote $
obj-y += cpu.o
obj-y += crtools.o
+obj-y += sigaction_compat.o
+obj-y += call32.o
diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
index 2ab68bad3d70..65af6cd86405 100644
--- a/criu/arch/x86/include/asm/restorer.h
+++ b/criu/arch/x86/include/asm/restorer.h
@@ -96,8 +96,20 @@ static inline void __always_unused __check_compat_sigset_t(void)
{
BUILD_BUG_ON(sizeof(compat_sigset_t) != sizeof(k_rtsigset_t));
}
+
+#define CONFIG_COMPAT
+extern void *alloc_compat_syscall_stack(void);
+extern void free_compat_syscall_stack(void *mem);
+extern unsigned long call32_from_64(void *stack, void *func);
+
+extern int arch_compat_rt_sigaction(void *stack32, int sig,
+ rt_sigaction_t_compat *act);
#else
#define rt_sigframe_ia32 rt_sigframe
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
#endif
typedef struct compat_sigaltstack {
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
index 7e3bb3c71bdb..bb9d3a0c6119 100644
--- a/criu/arch/x86/include/asm/types.h
+++ b/criu/arch/x86/include/asm/types.h
@@ -49,6 +49,13 @@ typedef struct {
} rt_sigaction_t;
typedef struct {
+ u32 rt_sa_handler;
+ u32 rt_sa_flags;
+ u32 rt_sa_restorer;
+ k_rtsigset_t rt_sa_mask;
+} rt_sigaction_t_compat;
+
+typedef struct {
unsigned int entry_number;
unsigned int base_addr;
unsigned int limit;
diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c
index d45c009e87ce..f16bdcaa5b68 100644
--- a/criu/arch/x86/restorer.c
+++ b/criu/arch/x86/restorer.c
@@ -33,8 +33,6 @@ int restore_nonsigframe_gpregs(UserX86RegsEntry *r)
return 0;
}
-extern unsigned long call32_from_64(void *stack, void *func);
-
asm ( " .pushsection .text \n"
" .global restore_set_thread_area \n"
" .code32 \n"
@@ -54,10 +52,8 @@ static int prepare_stack32(void)
if (stack32)
return 0;
- stack32 = (void*)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (stack32 == MAP_FAILED) {
- stack32 = NULL;
+ stack32 = alloc_compat_syscall_stack();
+ if (!stack32) {
pr_err("Failed to allocate stack for 32-bit TLS restore\n");
return -1;
}
@@ -91,5 +87,5 @@ void restore_tls(tls_t *ptls)
}
if (stack32)
- sys_munmap(stack32, PAGE_SIZE);
+ free_compat_syscall_stack(stack32);
}
diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c
new file mode 100644
index 000000000000..ef6d13c9101f
--- /dev/null
+++ b/criu/arch/x86/sigaction_compat.c
@@ -0,0 +1,68 @@
+#include "asm/restorer.h"
+#include "asm/fpu.h"
+#include "asm/string.h"
+
+#include <sys/mman.h>
+
+#ifdef CR_NOGLIBC
+# include "syscall.h"
+#else
+# define sys_mmap mmap
+# define sys_munmap munmap
+# ifndef __NR32_rt_sigaction
+# define __NR32_rt_sigaction 174
+# endif
+#endif
+#include "log.h"
+#include "cpu.h"
+
+void *alloc_compat_syscall_stack(void)
+{
+ void *mem = (void*)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ if (mem == MAP_FAILED)
+ return 0;
+ return mem;
+}
+
+void free_compat_syscall_stack(void *mem)
+{
+ sys_munmap(mem, PAGE_SIZE);
+}
+
+asm ( " .pushsection .text \n"
+ " .global restore_rt_sigaction \n"
+ " .code32 \n"
+ "restore_rt_sigaction: \n"
+ " mov %edx, %esi \n"
+ " mov $0, %edx \n"
+ " movl $"__stringify(__NR32_rt_sigaction)",%eax \n"
+ " int $0x80 \n"
+ " ret \n"
+ " .popsection \n"
+ " .code64");
+extern char restore_rt_sigaction;
+
+/*
+ * Call raw rt_sigaction syscall through int80 - so the ABI kernel choses
+ * to deliver this signal would be i386.
+ */
+int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
+{
+ int ret;
+
+ /*
+ * To be sure, that sigaction pointer lies under 4G,
+ * coping it on the bottom of the stack.
+ */
+ builtin_memcpy(stack32, act, sizeof(rt_sigaction_t_compat));
+
+ asm volatile ("\t movl %%ebx,%%ebx\n" : :"b"(sig)); /* signum */
+ asm volatile ("\t movl %%ecx,%%ecx\n" : :"c"(stack32)); /* act */
+ asm volatile ("\t movl %%edx,%%edx\n" : :"d"(sizeof(act->rt_sa_mask)));
+ call32_from_64(stack32 + PAGE_SIZE, &restore_rt_sigaction);
+ asm volatile ("\t movl %%eax,%0\n" : "=r"(ret));
+ return ret;
+}
+
diff --git a/criu/arch/x86/sigaction_compat_pie.c b/criu/arch/x86/sigaction_compat_pie.c
new file mode 120000
index 000000000000..009ac3a87e71
--- /dev/null
+++ b/criu/arch/x86/sigaction_compat_pie.c
@@ -0,0 +1 @@
+sigaction_compat.c
\ No newline at end of file
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 8b21d27aa2ed..a8071ce62c83 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -276,7 +276,15 @@ err:
}
static rt_sigaction_t sigchld_act;
+/*
+ * If parent's sigaction has blocked SIGKILL (which is non-sence),
+ * this parent action is non-valid and shouldn't be inherited.
+ * Used to mark parent_act* no more valid.
+ */
static rt_sigaction_t parent_act[SIGMAX];
+#ifdef CONFIG_COMPAT
+static rt_sigaction_t_compat parent_act_compat[SIGMAX];
+#endif
static bool sa_inherited(int sig, rt_sigaction_t *sa)
{
@@ -288,6 +296,10 @@ static bool sa_inherited(int sig, rt_sigaction_t *sa)
pa = &parent_act[sig];
+ /* Omitting non-valid sigaction */
+ if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+ return false;
+
for (i = 0; i < _KNSIG_WORDS; i++)
if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
return false;
@@ -297,26 +309,10 @@ static bool sa_inherited(int sig, rt_sigaction_t *sa)
pa->rt_sa_restorer == sa->rt_sa_restorer;
}
-/* Returns number of restored signals, -1 or negative errno on fail */
-static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
+static int restore_native_sigaction(int sig, SaEntry *e)
{
rt_sigaction_t act;
- SaEntry *e;
- int ret = 0;
-
- BUG_ON(sig == SIGKILL || sig == SIGSTOP);
-
- ret = pb_read_one_eof(img, &e, PB_SIGACT);
- if (ret == 0) {
- if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
- pr_err("Unexpected EOF %d\n", sig);
- return -1;
- }
- pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
- return -1;
- }
- if (ret < 0)
- return ret;
+ int ret;
ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction));
ASSIGN_TYPED(act.rt_sa_flags, e->flags);
@@ -324,8 +320,6 @@ static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
- sa_entry__free_unpacked(e, NULL);
-
if (sig == SIGCHLD) {
sigchld_act = act;
return 0;
@@ -345,10 +339,116 @@ static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
}
parent_act[sig - 1] = act;
+ /* Mark SIGKILL blocked which makes compat sigaction non-valid */
+#ifdef CONFIG_COMPAT
+ parent_act_compat[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+#endif
return 1;
}
+static void *stack32;
+
+#ifdef CONFIG_COMPAT
+static bool sa_compat_inherited(int sig, rt_sigaction_t_compat *sa)
+{
+ rt_sigaction_t_compat *pa;
+ int i;
+
+ if (current == root_item)
+ return false;
+
+ pa = &parent_act_compat[sig];
+
+ /* Omitting non-valid sigaction */
+ if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+ return false;
+
+ for (i = 0; i < _KNSIG_WORDS; i++)
+ if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
+ return false;
+
+ return pa->rt_sa_handler == sa->rt_sa_handler &&
+ pa->rt_sa_flags == sa->rt_sa_flags &&
+ pa->rt_sa_restorer == sa->rt_sa_restorer;
+}
+
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+ rt_sigaction_t_compat act;
+ int ret;
+
+ ASSIGN_TYPED(act.rt_sa_handler, (u32)e->sigaction);
+ ASSIGN_TYPED(act.rt_sa_flags, e->flags);
+ ASSIGN_TYPED(act.rt_sa_restorer, (u32)e->restorer);
+ BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
+ memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
+
+ if (sig == SIGCHLD) {
+ memcpy(&sigchld_act, &act, sizeof(rt_sigaction_t_compat));
+ return 0;
+ }
+
+ if (sa_compat_inherited(sig - 1, &act))
+ return 1;
+
+ if (!stack32) {
+ stack32 = alloc_compat_syscall_stack();
+ if (!stack32)
+ return -1;
+ }
+
+ ret = arch_compat_rt_sigaction(stack32, sig, &act);
+ if (ret < 0) {
+ pr_err("Can't restore compat sigaction: %d\n", ret);
+ return ret;
+ }
+
+ parent_act_compat[sig - 1] = act;
+ /* Mark SIGKILL blocked which makes native sigaction non-valid */
+ parent_act[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+
+ return 1;
+}
+#else
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+ return -1;
+}
+#endif
+
+/* Returns number of restored signals, -1 or negative errno on fail */
+static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
+{
+ bool sigaction_is_compat;
+ SaEntry *e;
+ int ret = 0;
+
+ BUG_ON(sig == SIGKILL || sig == SIGSTOP);
+
+ ret = pb_read_one_eof(img, &e, PB_SIGACT);
+ if (ret == 0) {
+ if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
+ pr_err("Unexpected EOF %d\n", sig);
+ return -1;
+ }
+ pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
+ return -1;
+ }
+ if (ret < 0)
+ return ret;
+
+ sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
+ if (sigaction_is_compat)
+ ret = restore_compat_sigaction(sig, e);
+ else
+ ret = restore_native_sigaction(sig, e);
+
+ sa_entry__free_unpacked(e, NULL);
+
+ return ret;
+}
+
static int prepare_sigactions(void)
{
int pid = current->pid.virt;
@@ -380,6 +480,10 @@ static int prepare_sigactions(void)
SIGMAX - 3 /* KILL, STOP and CHLD */);
close_image(img);
+ if (stack32) {
+ free_compat_syscall_stack(stack32);
+ stack32 = NULL;
+ }
return ret;
}
diff --git a/criu/include/parasite-compat.h b/criu/include/parasite-compat.h
index a0629a950558..f17455ecf472 100644
--- a/criu/include/parasite-compat.h
+++ b/criu/include/parasite-compat.h
@@ -7,12 +7,9 @@
#include "images/core.pb-c.h"
-typedef struct {
- u32 rt_sa_handler;
- u32 rt_sa_flags;
- u32 rt_sa_restorer;
- k_rtsigset_t rt_sa_mask;
-} rt_sigaction_t_compat;
+#if !defined(CONFIG_X86_64) && !defined(CONFIG_X86_32)
+#define rt_sigaction_t_compat rt_sigaction_t
+#endif
struct parasite_dump_sa_args_compat {
rt_sigaction_t_compat sas[SIGMAX];
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index f658dfd37c33..47ac71280953 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -693,8 +693,9 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_imgset *cr_
int ret, sig;
struct cr_img *img;
SaEntry se = SA_ENTRY__INIT;
+ bool native_task = seized_native(ctl);
- if (seized_native(ctl))
+ if (native_task)
args = parasite_args(ctl, struct parasite_dump_sa_args);
else
args_c = parasite_args(ctl, struct parasite_dump_sa_args_compat);
@@ -711,10 +712,12 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_imgset *cr_
if (sig == SIGSTOP || sig == SIGKILL)
continue;
- if (seized_native(ctl))
+ if (native_task)
ASSIGN_SAS(se, args);
else
ASSIGN_SAS(se, args_c);
+ se.has_compat_sigaction = true;
+ se.compat_sigaction = !native_task;
if (pb_write_one(img, &se, PB_SIGACT) < 0)
return -1;
diff --git a/criu/pie/Makefile b/criu/pie/Makefile
index fee5d3c6f911..e30293692c5e 100644
--- a/criu/pie/Makefile
+++ b/criu/pie/Makefile
@@ -15,6 +15,7 @@ restorer-obj-y += ./$(ARCH_DIR)/restorer.o
ifeq ($(ARCH),x86)
restorer-obj-e += ./$(ARCH_DIR)/syscalls-64.built-in.o
restorer-obj-y += ./$(ARCH_DIR)/call32.o
+ restorer-obj-y += ./$(ARCH_DIR)/sigaction_compat_pie.o
native-obj-y += ./$(ARCH_DIR)/parasite-head-64.o
native-obj-e += ./$(ARCH_DIR)/syscalls-64.built-in.o
@@ -68,6 +69,7 @@ define obj-export-compat-flags
LDFLAGS_$(notdir $(1)) := $(COMPAT_LDFLAGS)
endef
+$(eval $(call map,obj-export-native-flags,$(restorer-obj-y)))
$(eval $(call map,obj-export-native-flags,$(native-obj-y) native))
$(eval $(call map,obj-export-compat-flags,$(compat-obj-y) compat))
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 9e0f4e5d1641..a4a9f5e57520 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1460,7 +1460,20 @@ long __export_restore_task(struct task_restore_args *args)
goto core_restore_end;
}
- sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));
+ if (!args->compatible_mode) {
+ sys_sigaction(SIGCHLD, &args->sigchld_act,
+ NULL, sizeof(k_rtsigset_t));
+ } else {
+ void *stack = alloc_compat_syscall_stack();
+
+ if (!stack) {
+ pr_err("Failed to allocate 32-bit stack for sigaction\n");
+ goto core_restore_end;
+ }
+ arch_compat_rt_sigaction(stack, SIGCHLD,
+ (void*)&args->sigchld_act);
+ free_compat_syscall_stack(stack);
+ }
ret = restore_signals(args->siginfo, args->siginfo_n, true);
if (ret)
diff --git a/images/sa.proto b/images/sa.proto
index e5099100b8b1..fdfc6713975d 100644
--- a/images/sa.proto
+++ b/images/sa.proto
@@ -5,4 +5,5 @@ message sa_entry {
required uint64 flags = 2 [(criu).hex = true];
required uint64 restorer = 3 [(criu).hex = true];
required uint64 mask = 4 [(criu).hex = true];
+ optional bool compat_sigaction = 5;
}
--
2.9.0
More information about the CRIU
mailing list