[CRIU] [PATCHv3 28/30] restore/x86: call int80 for compat sigaction restore

Dmitry Safonov dsafonov at virtuozzo.com
Mon Jun 27 11:16:04 PDT 2016


The kernel patch "x86/signal: add SA_{X32,IA32}_ABI sa_flags" makes
signal's ABI the same as sigaction's syscall ABI instead of per-thread's
TIF_IA32 flag.
So for delivering signals with compatible ABI, we need to call
sigaction through raw int80 exception.
This patch restores signals with int80: cr-restore part and PIE restorer's
part lay in sigaction_compat.c, which compiled for criu binary and for
restorer PIE.
The PIE's part is needed strictly for setting SIGCHLD handler, other
signal handlers are set in cr-restore (as it was before).

Cc: Cyrill Gorcunov <gorcunov at openvz.org>
Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
---
v3: add missed commit description

 criu/arch/aarch64/include/asm/restorer.h |   5 ++
 criu/arch/arm/include/asm/restorer.h     |   5 ++
 criu/arch/ppc64/include/asm/restorer.h   |   5 ++
 criu/arch/x86/Makefile                   |   2 +
 criu/arch/x86/include/asm/restorer.h     |  12 +++
 criu/arch/x86/include/asm/types.h        |   7 ++
 criu/arch/x86/restorer.c                 |  10 +--
 criu/arch/x86/sigaction_compat.c         |  68 +++++++++++++++
 criu/arch/x86/sigaction_compat_pie.c     |   1 +
 criu/cr-restore.c                        | 144 ++++++++++++++++++++++++++-----
 criu/include/parasite-compat.h           |   9 +-
 criu/parasite-syscall.c                  |   7 +-
 criu/pie/Makefile                        |   2 +
 criu/pie/restorer.c                      |  15 +++-
 images/sa.proto                          |   1 +
 15 files changed, 257 insertions(+), 36 deletions(-)
 create mode 100644 criu/arch/x86/sigaction_compat.c
 create mode 120000 criu/arch/x86/sigaction_compat_pie.c

diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
index 19f459a0b08f..80f358c46a42 100644
--- a/criu/arch/aarch64/include/asm/restorer.h
+++ b/criu/arch/aarch64/include/asm/restorer.h
@@ -125,4 +125,9 @@ static inline int ptrace_flush_breakpoints(pid_t pid)
 	return 0;
 }
 
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
 #endif
diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h
index e17a80e7a971..305311799e9a 100644
--- a/criu/arch/arm/include/asm/restorer.h
+++ b/criu/arch/arm/include/asm/restorer.h
@@ -167,4 +167,9 @@ static inline int ptrace_flush_breakpoints(pid_t pid)
 	return 0;
 }
 
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
 #endif
diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h
index e1c08ef17eeb..e9d171a7b1df 100644
--- a/criu/arch/ppc64/include/asm/restorer.h
+++ b/criu/arch/ppc64/include/asm/restorer.h
@@ -131,4 +131,9 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe,
  */
 unsigned long sys_shmat(int shmid, const void *shmaddr, int shmflg);
 
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
+
 #endif /*__CR_ASM_RESTORER_H__*/
diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile
index 5db577340a7d..d8e8251845bf 100644
--- a/criu/arch/x86/Makefile
+++ b/criu/arch/x86/Makefile
@@ -4,3 +4,5 @@ ccflags-y		+= -iquote $(obj) -iquote $(SRC_DIR) -iquote $(obj)/include -iquote $
 
 obj-y			+= cpu.o
 obj-y			+= crtools.o
+obj-y			+= sigaction_compat.o
+obj-y			+= call32.o
diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
index 2ab68bad3d70..65af6cd86405 100644
--- a/criu/arch/x86/include/asm/restorer.h
+++ b/criu/arch/x86/include/asm/restorer.h
@@ -96,8 +96,20 @@ static inline void __always_unused __check_compat_sigset_t(void)
 {
 	BUILD_BUG_ON(sizeof(compat_sigset_t) != sizeof(k_rtsigset_t));
 }
+
+#define CONFIG_COMPAT
+extern void *alloc_compat_syscall_stack(void);
+extern void free_compat_syscall_stack(void *mem);
+extern unsigned long call32_from_64(void *stack, void *func);
+
+extern int arch_compat_rt_sigaction(void *stack32, int sig,
+		rt_sigaction_t_compat *act);
 #else
 #define rt_sigframe_ia32		rt_sigframe
+static inline void *alloc_compat_syscall_stack(void) { return NULL; }
+static inline void free_compat_syscall_stack(void *stack32) { }
+static inline int
+arch_compat_rt_sigaction(void *stack, int sig, void *act) { return -1; }
 #endif
 
 typedef struct compat_sigaltstack {
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
index 7e3bb3c71bdb..bb9d3a0c6119 100644
--- a/criu/arch/x86/include/asm/types.h
+++ b/criu/arch/x86/include/asm/types.h
@@ -49,6 +49,13 @@ typedef struct {
 } rt_sigaction_t;
 
 typedef struct {
+	u32	rt_sa_handler;
+	u32	rt_sa_flags;
+	u32	rt_sa_restorer;
+	k_rtsigset_t	rt_sa_mask;
+} rt_sigaction_t_compat;
+
+typedef struct {
 	unsigned int	entry_number;
 	unsigned int	base_addr;
 	unsigned int	limit;
diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c
index d45c009e87ce..f16bdcaa5b68 100644
--- a/criu/arch/x86/restorer.c
+++ b/criu/arch/x86/restorer.c
@@ -33,8 +33,6 @@ int restore_nonsigframe_gpregs(UserX86RegsEntry *r)
 	return 0;
 }
 
-extern unsigned long call32_from_64(void *stack, void *func);
-
 asm (	"	.pushsection .text				\n"
 	"	.global restore_set_thread_area			\n"
 	"	.code32						\n"
@@ -54,10 +52,8 @@ static int prepare_stack32(void)
 	if (stack32)
 		return 0;
 
-	stack32 = (void*)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
-				MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-	if (stack32 == MAP_FAILED) {
-		stack32 = NULL;
+	stack32 = alloc_compat_syscall_stack();
+	if (!stack32) {
 		pr_err("Failed to allocate stack for 32-bit TLS restore\n");
 		return -1;
 	}
@@ -91,5 +87,5 @@ void restore_tls(tls_t *ptls)
 	}
 
 	if (stack32)
-		sys_munmap(stack32, PAGE_SIZE);
+		free_compat_syscall_stack(stack32);
 }
diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c
new file mode 100644
index 000000000000..ef6d13c9101f
--- /dev/null
+++ b/criu/arch/x86/sigaction_compat.c
@@ -0,0 +1,68 @@
+#include "asm/restorer.h"
+#include "asm/fpu.h"
+#include "asm/string.h"
+
+#include <sys/mman.h>
+
+#ifdef CR_NOGLIBC
+# include "syscall.h"
+#else
+# define sys_mmap mmap
+# define sys_munmap munmap
+# ifndef  __NR32_rt_sigaction
+#  define  __NR32_rt_sigaction 174
+# endif
+#endif
+#include "log.h"
+#include "cpu.h"
+
+void *alloc_compat_syscall_stack(void)
+{
+	void *mem = (void*)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+			MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+	if (mem == MAP_FAILED)
+		return 0;
+	return mem;
+}
+
+void free_compat_syscall_stack(void *mem)
+{
+	sys_munmap(mem, PAGE_SIZE);
+}
+
+asm (	"	.pushsection .text				\n"
+	"	.global restore_rt_sigaction			\n"
+	"	.code32						\n"
+	"restore_rt_sigaction:					\n"
+	"	mov %edx, %esi					\n"
+	"	mov $0, %edx					\n"
+	"	movl $"__stringify(__NR32_rt_sigaction)",%eax	\n"
+	"	int $0x80					\n"
+	"	ret						\n"
+	"	.popsection					\n"
+	"	.code64");
+extern char restore_rt_sigaction;
+
+/*
+ * Call raw rt_sigaction syscall through int80 - so the ABI kernel choses
+ * to deliver this signal would be i386.
+ */
+int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
+{
+	int ret;
+
+	/*
+	 * To be sure, that sigaction pointer lies under 4G,
+	 * coping it on the bottom of the stack.
+	 */
+	builtin_memcpy(stack32, act, sizeof(rt_sigaction_t_compat));
+
+	asm volatile ("\t movl %%ebx,%%ebx\n" : :"b"(sig));	/* signum */
+	asm volatile ("\t movl %%ecx,%%ecx\n" : :"c"(stack32));	/* act */
+	asm volatile ("\t movl %%edx,%%edx\n" : :"d"(sizeof(act->rt_sa_mask)));
+	call32_from_64(stack32 + PAGE_SIZE, &restore_rt_sigaction);
+	asm volatile ("\t movl %%eax,%0\n" : "=r"(ret));
+	return ret;
+}
+
diff --git a/criu/arch/x86/sigaction_compat_pie.c b/criu/arch/x86/sigaction_compat_pie.c
new file mode 120000
index 000000000000..009ac3a87e71
--- /dev/null
+++ b/criu/arch/x86/sigaction_compat_pie.c
@@ -0,0 +1 @@
+sigaction_compat.c
\ No newline at end of file
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 8b21d27aa2ed..a8071ce62c83 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -276,7 +276,15 @@ err:
 }
 
 static rt_sigaction_t sigchld_act;
+/*
+ * If parent's sigaction has blocked SIGKILL (which is non-sence),
+ * this parent action is non-valid and shouldn't be inherited.
+ * Used to mark parent_act* no more valid.
+ */
 static rt_sigaction_t parent_act[SIGMAX];
+#ifdef CONFIG_COMPAT
+static rt_sigaction_t_compat parent_act_compat[SIGMAX];
+#endif
 
 static bool sa_inherited(int sig, rt_sigaction_t *sa)
 {
@@ -288,6 +296,10 @@ static bool sa_inherited(int sig, rt_sigaction_t *sa)
 
 	pa = &parent_act[sig];
 
+	/* Omitting non-valid sigaction */
+	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+		return false;
+
 	for (i = 0; i < _KNSIG_WORDS; i++)
 		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
 			return false;
@@ -297,26 +309,10 @@ static bool sa_inherited(int sig, rt_sigaction_t *sa)
 		pa->rt_sa_restorer == sa->rt_sa_restorer;
 }
 
-/* Returns number of restored signals, -1 or negative errno on fail */
-static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
+static int restore_native_sigaction(int sig, SaEntry *e)
 {
 	rt_sigaction_t act;
-	SaEntry *e;
-	int ret = 0;
-
-	BUG_ON(sig == SIGKILL || sig == SIGSTOP);
-
-	ret = pb_read_one_eof(img, &e, PB_SIGACT);
-	if (ret == 0) {
-		if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
-			pr_err("Unexpected EOF %d\n", sig);
-			return -1;
-		}
-		pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
-		return -1;
-	}
-	if (ret < 0)
-		return ret;
+	int ret;
 
 	ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction));
 	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
@@ -324,8 +320,6 @@ static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
 	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
 	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
 
-	sa_entry__free_unpacked(e, NULL);
-
 	if (sig == SIGCHLD) {
 		sigchld_act = act;
 		return 0;
@@ -345,10 +339,116 @@ static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
 	}
 
 	parent_act[sig - 1] = act;
+	/* Mark SIGKILL blocked which makes compat sigaction non-valid */
+#ifdef CONFIG_COMPAT
+	parent_act_compat[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+#endif
 
 	return 1;
 }
 
+static void *stack32;
+
+#ifdef CONFIG_COMPAT
+static bool sa_compat_inherited(int sig, rt_sigaction_t_compat *sa)
+{
+	rt_sigaction_t_compat *pa;
+	int i;
+
+	if (current == root_item)
+		return false;
+
+	pa = &parent_act_compat[sig];
+
+	/* Omitting non-valid sigaction */
+	if (pa->rt_sa_mask.sig[0] & (1 << SIGKILL))
+		return false;
+
+	for (i = 0; i < _KNSIG_WORDS; i++)
+		if (pa->rt_sa_mask.sig[i] != sa->rt_sa_mask.sig[i])
+			return false;
+
+	return pa->rt_sa_handler == sa->rt_sa_handler &&
+		pa->rt_sa_flags == sa->rt_sa_flags &&
+		pa->rt_sa_restorer == sa->rt_sa_restorer;
+}
+
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+	rt_sigaction_t_compat act;
+	int ret;
+
+	ASSIGN_TYPED(act.rt_sa_handler, (u32)e->sigaction);
+	ASSIGN_TYPED(act.rt_sa_flags, e->flags);
+	ASSIGN_TYPED(act.rt_sa_restorer, (u32)e->restorer);
+	BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig));
+	memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig));
+
+	if (sig == SIGCHLD) {
+		memcpy(&sigchld_act, &act, sizeof(rt_sigaction_t_compat));
+		return 0;
+	}
+
+	if (sa_compat_inherited(sig - 1, &act))
+		return 1;
+
+	if (!stack32) {
+		stack32 = alloc_compat_syscall_stack();
+		if (!stack32)
+			return -1;
+	}
+
+	ret = arch_compat_rt_sigaction(stack32, sig, &act);
+	if (ret < 0) {
+		pr_err("Can't restore compat sigaction: %d\n", ret);
+		return ret;
+	}
+
+	parent_act_compat[sig - 1] = act;
+	/* Mark SIGKILL blocked which makes native sigaction non-valid */
+	parent_act[sig - 1].rt_sa_mask.sig[0] |= 1 << SIGKILL;
+
+	return 1;
+}
+#else
+static int restore_compat_sigaction(int sig, SaEntry *e)
+{
+	return -1;
+}
+#endif
+
+/* Returns number of restored signals, -1 or negative errno on fail */
+static int restore_one_sigaction(int sig, struct cr_img *img, int pid)
+{
+	bool sigaction_is_compat;
+	SaEntry *e;
+	int ret = 0;
+
+	BUG_ON(sig == SIGKILL || sig == SIGSTOP);
+
+	ret = pb_read_one_eof(img, &e, PB_SIGACT);
+	if (ret == 0) {
+		if (sig != SIGMAX_OLD + 1) { /* backward compatibility */
+			pr_err("Unexpected EOF %d\n", sig);
+			return -1;
+		}
+		pr_warn("This format of sigacts-%d.img is deprecated\n", pid);
+		return -1;
+	}
+	if (ret < 0)
+		return ret;
+
+	sigaction_is_compat = e->has_compat_sigaction && e->compat_sigaction;
+	if (sigaction_is_compat)
+		ret = restore_compat_sigaction(sig, e);
+	else
+		ret = restore_native_sigaction(sig, e);
+
+	sa_entry__free_unpacked(e, NULL);
+
+	return ret;
+}
+
 static int prepare_sigactions(void)
 {
 	int pid = current->pid.virt;
@@ -380,6 +480,10 @@ static int prepare_sigactions(void)
 			SIGMAX - 3 /* KILL, STOP and CHLD */);
 
 	close_image(img);
+	if (stack32) {
+		free_compat_syscall_stack(stack32);
+		stack32 = NULL;
+	}
 	return ret;
 }
 
diff --git a/criu/include/parasite-compat.h b/criu/include/parasite-compat.h
index a0629a950558..f17455ecf472 100644
--- a/criu/include/parasite-compat.h
+++ b/criu/include/parasite-compat.h
@@ -7,12 +7,9 @@
 
 #include "images/core.pb-c.h"
 
-typedef struct {
-	u32	rt_sa_handler;
-	u32	rt_sa_flags;
-	u32	rt_sa_restorer;
-	k_rtsigset_t	rt_sa_mask;
-} rt_sigaction_t_compat;
+#if !defined(CONFIG_X86_64) && !defined(CONFIG_X86_32)
+#define rt_sigaction_t_compat rt_sigaction_t
+#endif
 
 struct parasite_dump_sa_args_compat {
 	rt_sigaction_t_compat sas[SIGMAX];
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index f658dfd37c33..47ac71280953 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -693,8 +693,9 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_imgset *cr_
 	int ret, sig;
 	struct cr_img *img;
 	SaEntry se = SA_ENTRY__INIT;
+	bool native_task = seized_native(ctl);
 
-	if (seized_native(ctl))
+	if (native_task)
 		args = parasite_args(ctl, struct parasite_dump_sa_args);
 	else
 		args_c = parasite_args(ctl, struct parasite_dump_sa_args_compat);
@@ -711,10 +712,12 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct cr_imgset *cr_
 		if (sig == SIGSTOP || sig == SIGKILL)
 			continue;
 
-		if (seized_native(ctl))
+		if (native_task)
 			ASSIGN_SAS(se, args);
 		else
 			ASSIGN_SAS(se, args_c);
+		se.has_compat_sigaction = true;
+		se.compat_sigaction = !native_task;
 
 		if (pb_write_one(img, &se, PB_SIGACT) < 0)
 			return -1;
diff --git a/criu/pie/Makefile b/criu/pie/Makefile
index fee5d3c6f911..e30293692c5e 100644
--- a/criu/pie/Makefile
+++ b/criu/pie/Makefile
@@ -15,6 +15,7 @@ restorer-obj-y		+= ./$(ARCH_DIR)/restorer.o
 ifeq ($(ARCH),x86)
         restorer-obj-e		+= ./$(ARCH_DIR)/syscalls-64.built-in.o
         restorer-obj-y		+= ./$(ARCH_DIR)/call32.o
+        restorer-obj-y		+= ./$(ARCH_DIR)/sigaction_compat_pie.o
 
         native-obj-y		+= ./$(ARCH_DIR)/parasite-head-64.o
         native-obj-e		+= ./$(ARCH_DIR)/syscalls-64.built-in.o
@@ -68,6 +69,7 @@ define obj-export-compat-flags
         LDFLAGS_$(notdir $(1))	:= $(COMPAT_LDFLAGS)
 endef
 
+$(eval $(call map,obj-export-native-flags,$(restorer-obj-y)))
 $(eval $(call map,obj-export-native-flags,$(native-obj-y) native))
 $(eval $(call map,obj-export-compat-flags,$(compat-obj-y) compat))
 
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 9e0f4e5d1641..a4a9f5e57520 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1460,7 +1460,20 @@ long __export_restore_task(struct task_restore_args *args)
 		goto core_restore_end;
 	}
 
-	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));
+	if (!args->compatible_mode) {
+		sys_sigaction(SIGCHLD, &args->sigchld_act,
+				NULL, sizeof(k_rtsigset_t));
+	} else {
+		void *stack = alloc_compat_syscall_stack();
+
+		if (!stack) {
+			pr_err("Failed to allocate 32-bit stack for sigaction\n");
+			goto core_restore_end;
+		}
+		arch_compat_rt_sigaction(stack, SIGCHLD,
+				(void*)&args->sigchld_act);
+		free_compat_syscall_stack(stack);
+	}
 
 	ret = restore_signals(args->siginfo, args->siginfo_n, true);
 	if (ret)
diff --git a/images/sa.proto b/images/sa.proto
index e5099100b8b1..fdfc6713975d 100644
--- a/images/sa.proto
+++ b/images/sa.proto
@@ -5,4 +5,5 @@ message sa_entry {
 	required uint64	flags		= 2 [(criu).hex = true];
 	required uint64	restorer	= 3 [(criu).hex = true];
 	required uint64	mask		= 4 [(criu).hex = true];
+	optional bool compat_sigaction	= 5;
 }
-- 
2.9.0



More information about the CRIU mailing list