[CRIU] [PATCH 5/5] Redesign of syscalls

Cyrill Gorcunov gorcunov at openvz.org
Tue Mar 27 09:01:42 EDT 2012


This patch fixes long standing bug with syscalls description.
Before we've had syscalls described inline in syscall.h with
move parameters as "long" with "g" specificator, which sometime
makes gcc to put direct addresses of memory operands instead of
relative offsets. Moreover, the way we generate binary blobs was
a bit wrong, the blob must be one-sectioned .text file and offsets
should be taked from a file where relocations are already applied.

So now we generate syscalls from syscall.def template file.
This forced to redesign the Makefiles and the order of files
building, but after all the amount of code shrinks and become
more readable I think.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile                |    9 +-
 Makefile.inc            |    1 +
 Makefile.pie            |   16 +-
 Makefile.syscall        |   39 +++++
 cr-restore.c            |    8 +-
 include/syscall-codes.h |   62 -------
 include/syscall-types.h |   51 ++++++
 include/syscall.def     |   57 ++++++
 include/syscall.h       |  435 -----------------------------------------------
 parasite.c              |   17 +-
 restorer.c              |   43 ++++--
 syscall-common.S        |   16 ++
 syscalls.pl             |   51 ++++++
 13 files changed, 272 insertions(+), 533 deletions(-)
 create mode 100644 Makefile.syscall
 delete mode 100644 include/syscall-codes.h
 create mode 100644 include/syscall-types.h
 create mode 100644 include/syscall.def
 delete mode 100644 include/syscall.h
 create mode 100644 syscall-common.S
 create mode 100644 syscalls.pl

diff --git a/Makefile b/Makefile
index 33a0682..7d99109 100644
--- a/Makefile
+++ b/Makefile
@@ -46,6 +46,7 @@ OBJS		+= ipc_ns.o
 
 DEPS		+= $(patsubst %.o,%.d,$(OBJS))
 
+-include Makefile.syscall
 -include Makefile.pie
 
 all: $(PROGRAM)
@@ -62,9 +63,9 @@ all: $(PROGRAM)
 	$(E) "  CC      " $@
 	$(Q) $(CC) -S $(CFLAGS) -fverbose-asm $< -o $@
 
-$(PROGRAM): $(OBJS) | $(PIE-GEN)
+$(PROGRAM): $(OBJS) | $(SYS-OBJ) $(PIE-GEN)
 	$(E) "  LINK    " $@
-	$(Q) $(CC) $(CFLAGS) $(OBJS) $(LIBS) -o $@
+	$(Q) $(CC) $(CFLAGS) $(OBJS) $(SYS-OBJ) $(LIBS) -o $@
 
 %.d: %.c | $(PIE-GEN)
 	$(Q) $(CC) -M -MT $(patsubst %.d,%.o,$@) $(CFLAGS) $< -o $@
@@ -81,14 +82,14 @@ test: zdtm
 	$(Q) $(SH) test/zdtm.sh
 .PHONY: test
 
-rebuild:
+rebuild: cleanpie cleansyscall
 	$(E) "  FORCE-REBUILD"
 	$(Q) $(RM) -f ./*.o
 	$(Q) $(RM) -f ./*.d
 	$(Q) $(MAKE)
 .PHONY: rebuild
 
-clean: cleanpie
+clean: cleanpie cleansyscall
 	$(E) "  CLEAN"
 	$(Q) $(RM) -f ./*.o
 	$(Q) $(RM) -f ./*.d
diff --git a/Makefile.inc b/Makefile.inc
index ffa02d9..c755a38 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -19,6 +19,7 @@ NM		:= nm
 AWK		:= awk
 SH		:= sh
 MAKE		:= make
+PERL		:= perl
 
 # Additional ARCH settings for x86
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
diff --git a/Makefile.pie b/Makefile.pie
index bfeb2be..b25d160 100644
--- a/Makefile.pie
+++ b/Makefile.pie
@@ -20,13 +20,13 @@ RBLOB-LDS	:= pie.lds.S
 DEPS		+= $(patsubst %.o,%.d,$(POBJS))
 DEPS		+= $(patsubst %.o,%.d,$(ROBJS))
 
-PIEFLAGS	:= -fpic
+PIEFLAGS	:= -fpie
 
-$(POBJS): $(PSRCS)
+$(POBJS): $(PSRCS) $(SYS-OBJ)
 	$(E) "  CC      " $@
 	$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $(patsubst %.o,%.c,$@) -o $@
 
-parasite-util-net.o: util-net.c
+parasite-util-net.o: util-net.c $(SYS-OBJ)
 	$(E) "  CC      " $@
 	$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $< -o $@
 
@@ -34,21 +34,21 @@ POBJS		+= parasite-util-net.o
 
 $(PBLOB-BIN): $(PBLOB-LDS) $(POBJS)
 	$(E) "  GEN     " $@
-	$(Q) $(LD) --oformat=binary -T $(PBLOB-LDS) -o $(PBLOB-BIN) $(POBJS)
-	$(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) -o $(PBLOB-BIN).o $(POBJS)
+	$(Q) $(LD) --oformat=binary -T $(PBLOB-LDS) -o $(PBLOB-BIN) $(POBJS) $(SYS-OBJ)
+	$(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) -o $(PBLOB-BIN).o $(POBJS) $(SYS-OBJ)
 
 $(PBLOB-HDR): $(PBLOB-BIN) $(GEN-OFFSETS)
 	$(E) "  GEN     " $@
 	$(Q) $(SH) $(GEN-OFFSETS) $(PBLOB-NAME) > $@ || rm -f $@
 
-$(ROBJS): $(RSRCS)
+$(ROBJS): $(RSRCS) $(SYS-OBJ)
 	$(E) "  CC      " $@
 	$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $(patsubst %.o,%.c,$@) -o $@
 
 $(RBLOB-BIN): $(RBLOB-LDS) $(ROBJS)
 	$(E) "  GEN     " $@
-	$(Q) $(LD) --oformat=binary -T $(RBLOB-LDS) -o $(RBLOB-BIN) $(ROBJS)
-	$(Q) $(LD) --oformat=elf64-x86-64 -T $(RBLOB-LDS) -o $(RBLOB-BIN).o $(ROBJS)
+	$(Q) $(LD) --oformat=binary -T $(RBLOB-LDS) -o $(RBLOB-BIN) $(ROBJS) $(SYS-OBJ)
+	$(Q) $(LD) --oformat=elf64-x86-64 -T $(RBLOB-LDS) -o $(RBLOB-BIN).o $(ROBJS) $(SYS-OBJ)
 
 $(RBLOB-HDR): $(RBLOB-BIN) $(GEN-OFFSETS)
 	$(E) "  GEN     " $@
diff --git a/Makefile.syscall b/Makefile.syscall
new file mode 100644
index 0000000..1d7299e
--- /dev/null
+++ b/Makefile.syscall
@@ -0,0 +1,39 @@
+SYS-DEF		:= include/syscall.def
+SYS-ASM-COMMON	:= syscall-common.S
+SYS-TYPES	:= include/syscall-types.h
+
+SYS-CODES	:= include/syscall-codes.h
+SYS-PROTO	:= include/syscall.h
+
+SYS-ASM		:= syscall.S
+SYS-GEN		:= syscalls.pl
+
+SYS-OBJ		:= $(patsubst %.S,%.o,$(SYS-ASM))
+
+SYS-FLAGS	:= -pie -Wstrict-prototypes -nostdlib -fomit-frame-pointer
+
+$(SYS-ASM): $(SYS-GEN) $(SYS-DEF) $(SYS-ASM-COMMON) $(SYS-TYPES)
+	$(E) "  GEN     " $@
+	$(Q) $(PERL)			\
+		$(SYS-GEN)		\
+		$(SYS-DEF)		\
+		$(SYS-CODES)		\
+		$(SYS-PROTO)		\
+		$(SYS-ASM)		\
+		$(SYS-ASM-COMMON)	\
+		$(SYS-TYPES)
+
+$(SYS-OBJ): $(SYS-ASM)
+	$(E) "  CC      " $@.prelim
+	$(Q) $(CC) -c $(CFLAGS) $(SYS-FLAGS)  $< -o $@.prelim
+	$(E) "  LD      " $@
+	$(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) $@.prelim -o $@
+
+cleansyscall:
+	$(E) "  CLEAN SYSCALLS"
+	$(Q) $(RM) -f ./$(SYS-CODES)
+	$(Q) $(RM) -f ./$(SYS-PROTO)
+	$(Q) $(RM) -f ./$(SYS-ASM)
+	$(Q) $(RM) -f ./*.prelim
+	$(Q) $(RM) -f ./$(SYS-OBJ)
+.PHONY: cleansyscall
diff --git a/cr-restore.c b/cr-restore.c
index 22b6afb..c9f431e 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -822,7 +822,7 @@ static int prepare_sigactions(int pid)
 		 * A pure syscall is used, because glibc
 		 * sigaction overwrites se_restorer.
 		 */
-		ret = sys_sigaction(sig, &act, &oact);
+		ret = sys_sigaction(sig, &act, &oact, sizeof(rt_sigset_t));
 		if (ret == -1) {
 			pr_err("%d: Can't restore sigaction: %m\n", pid);
 			goto err;
@@ -1583,10 +1583,10 @@ static int sigreturn_restore(pid_t pid)
 	restore_thread_exec_start	= restore_code_start + restorer_blob_offset__restore_thread;
 	restore_task_exec_start		= restore_code_start + restorer_blob_offset__restore_task;
 	task_args			= restore_code_start + restore_code_len;
-	thread_args			= restore_thread_exec_start;
+	thread_args			= (void *)task_args + sizeof(*task_args);
 
-	memzero_p(task_args);
-	memzero_p(thread_args);
+	memzero(task_args, sizeof(*task_args));
+	memzero(thread_args, sizeof(*thread_args) * me->nr_threads);
 
 	/*
 	 * Code at a new place.
diff --git a/include/syscall-codes.h b/include/syscall-codes.h
deleted file mode 100644
index 86adf75..0000000
--- a/include/syscall-codes.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef CR_SYSCALL_CODES_H_
-#define CR_SYSCALL_CODES_H_
-
-#ifdef CONFIG_X86_64
-
-#define __NR_read		0
-#define __NR_write		1
-#define __NR_open		2
-#define __NR_close		3
-#define __NR_lseek		8
-#define __NR_mmap		9
-#define __NR_mprotect		10
-#define __NR_munmap		11
-#define __NR_brk		12
-#define __NR_rt_sigaction	13
-#define __NR_rt_sigprocmask	14
-#define __NR_rt_sigreturn	15
-#define __NR_mincore		27
-#define __NR_shmat		30
-#define __NR_dup		32
-#define __NR_dup2		33
-#define __NR_pause		34
-#define __NR_nanosleep		35
-#define __NR_getitimer		36
-#define __NR_setitimer		38
-#define __NR_getpid		39
-#define __NR_socket		41
-#define __NR_sendmsg		46
-#define __NR_recvmsg		47
-#define __NR_bind		49
-#define __NR_setsockopt		54
-#define __NR_getsockopt		55
-#define __NR_clone		56
-#define __NR_exit		60
-#define __NR_wait4		61
-#define __NR_kill		62
-#define __NR_flock		73
-#define __NR_unlink		87
-#define __NR_setresuid		117
-#define __NR_setresgid		119
-#define __NR_setfsuid		122
-#define __NR_setfsgid		123
-#define __NR_capset		126
-#define __NR_tgkill		131
-#define __NR__sysctl		156
-#define __NR_prctl		157
-#define __NR_arch_prctl		158
-#define __NR_gettid		186
-#define __NR_futex		202
-#define __NR_set_thread_area	205
-#define __NR_get_thread_area	211
-#define __NR_set_tid_address	218
-#define __NR_restart_syscall	219
-#define __NR_msync		227
-#define __NR_setns		308
-#define __NR_kcmp		312
-
-#else /* CONFIG_X86_64 */
-# error x86-32 bit mode not yet implemented
-#endif /* CONFIG_X86_64 */
-
-#endif /* CR_SYSCALL_CODES_H_ */
diff --git a/include/syscall-types.h b/include/syscall-types.h
new file mode 100644
index 0000000..bf86c00
--- /dev/null
+++ b/include/syscall-types.h
@@ -0,0 +1,51 @@
+/*
+ * Please add here type definitions if
+ * syscall prototypes need them.
+ *
+ * Anything else should go to plain type.h
+ */
+
+#ifndef SYSCALL_TYPES_H__
+#define SYSCALL_TYPES_H__
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <arpa/inet.h>
+
+#include "types.h"
+#include "compiler.h"
+
+#ifndef CONFIG_X86_64
+# error x86-32 bit mode not yet implemented
+#endif
+
+struct cap_header {
+	u32 version;
+	int pid;
+};
+
+struct cap_data {
+	u32 eff;
+	u32 prm;
+	u32 inh;
+};
+
+struct sockaddr;
+struct msghdr;
+struct rusage;
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID	0x20000000
+#endif
+
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS	0x04000000
+#endif
+
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC	0x08000000
+#endif
+
+#define setns	sys_setns
+
+#endif /* SYSCALL_TYPES_H__ */
diff --git a/include/syscall.def b/include/syscall.def
new file mode 100644
index 0000000..c1e3a70
--- /dev/null
+++ b/include/syscall.def
@@ -0,0 +1,57 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+#
+# The template is (name and srguments are optinal if you need only __NR_x
+# defined, but no realy entry point in syscalls lib).
+#
+# name			code		name			arguments
+# -----------------------------------------------------------------------
+#
+__NR_read		0		sys_read		(int fd, void *buf, unsigned long count)
+__NR_write		1		sys_write		(int fd, const void *buf, unsigned long count)
+__NR_open		2		sys_open		(const char *filename, unsigned long flags, unsigned long mode)
+__NR_close		3		sys_close		(int fd)
+__NR_lseek		8		sys_lseek		(int fd, unsigned long offset, unsigned long origin)
+__NR_mmap		9		sys_mmap		(void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect		10		sys_mprotect		(const void *addr, unsigned long len, unsigned long prot)
+__NR_munmap		11		sys_munmap		(void *addr, unsigned long len)
+__NR_brk		12		sys_brk			(void *addr)
+__NR_rt_sigaction	13		sys_sigaction		(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask	14		sys_sigprocmask		(int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigreturn	15		sys_rt_sigreturn	(void)
+__NR_mincore		27		sys_mincore		(void *addr, unsigned long size, unsigned char *vec)
+__NR_shmat		30		sys_shmat		(int shmid, void *shmaddr, int shmflag)
+__NR_pause		34		sys_pause		(void)
+__NR_nanosleep		35		sys_nanosleep		(struct timespec *req, struct timespec *rem)
+__NR_getitimer		36		sys_getitimer		(int which, const struct itimerval *val)
+__NR_setitimer		38		sys_setitimer		(int which, const struct itimerval *val, struct itimerval *old)
+__NR_getpid		39		sys_getpid		(void)
+__NR_socket		41		sys_socket		(int domain, int type, int protocol)
+__NR_sendmsg		46		sys_sendmsg		(int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg		47		sys_recvmsg		(int sockfd, struct msghdr *msg, int flags)
+__NR_bind		49		sys_bind		(int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_setsockopt		54		sys_setsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt		55		sys_getsockopt		(int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_clone		56		sys_clone		(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid)
+__NR_exit		60		sys_exit		(unsigned long error_code)
+__NR_wait4		61		sys_waitpid		(int pid, int *status, int options, struct rusage *ru)
+__NR_kill		62		sys_kill		(long pid, int sig)
+__NR_flock		73		sys_flock		(int fd, unsigned long cmd)
+__NR_unlink		87		sys_unlink		(char *pathname)
+__NR_setresuid		117		sys_setresuid		(int uid, int euid, int suid)
+__NR_setresgid		119		sys_setresgid		(int gid, int egid, int sgid)
+__NR_setfsuid		122		sys_setfsuid		(int fsuid)
+__NR_setfsgid		123		sys_setfsgid		(int fsgid)
+__NR_capset		126		sys_capset		(struct cap_header *h, struct cap_data *d)
+__NR_personality	135		sys_personality		(unsigned int personality)
+__NR_prctl		157		sys_prctl		(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_arch_prctl		158		sys_arch_prctl		(int option, unsigned long addr)
+__NR_gettid		186		sys_gettid		(void)
+__NR_futex		202		sys_futex		(u32 *uaddr, int op, u32 val, struct timespec *utime, u32 *uaddr2, u32 val3)
+__NR_set_thread_area	205		sys_set_thread_area	(user_desc_t *info)
+__NR_get_thread_area	211		sys_get_thread_area	(user_desc_t *info)
+__NR_set_tid_address	218		sys_set_tid_address	(int *tid_addr)
+__NR_restart_syscall	219		sys_restart_syscall	(void)
+__NR_setns		308		sys_setns		(int fd, int nstype)
+__NR_kcmp		312		sys_kcmp		(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
diff --git a/include/syscall.h b/include/syscall.h
deleted file mode 100644
index 31a5d9a..0000000
--- a/include/syscall.h
+++ /dev/null
@@ -1,435 +0,0 @@
-#ifndef CR_SYSCALL_H_
-#define CR_SYSCALL_H_
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <arpa/inet.h>
-
-#include "types.h"
-#include "compiler.h"
-#include "syscall-codes.h"
-
-#ifdef CONFIG_X86_64
-
-static always_inline long syscall0(int nr)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr)
-		: "rax", "memory");
-	return ret;
-}
-
-static always_inline long syscall1(int nr, unsigned long arg0)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0)
-		: "rax", "rdi", "memory");
-	return ret;
-}
-
-static always_inline long syscall2(int nr, unsigned long arg0, unsigned long arg1)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"movq %3, %%rsi		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0), "g" (arg1)
-		: "rax", "rdi", "rsi", "memory");
-	return ret;
-}
-
-static always_inline long syscall3(int nr, unsigned long arg0, unsigned long arg1,
-				   unsigned long arg2)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"movq %3, %%rsi		\t\n"
-		"movq %4, %%rdx		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2)
-		: "rax", "rdi", "rsi", "rdx", "memory");
-	return ret;
-}
-
-static always_inline long syscall4(int nr, unsigned long arg0, unsigned long arg1,
-				   unsigned long arg2, unsigned long arg3)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"movq %3, %%rsi		\t\n"
-		"movq %4, %%rdx		\t\n"
-		"movq %5, %%r10		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
-			"g" (arg3)
-		: "rax", "rdi", "rsi", "rdx", "r10", "memory");
-	return ret;
-}
-
-static long always_inline syscall5(int nr, unsigned long arg0, unsigned long arg1,
-				   unsigned long arg2, unsigned long arg3,
-				   unsigned long arg4)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"movq %3, %%rsi		\t\n"
-		"movq %4, %%rdx		\t\n"
-		"movq %5, %%r10		\t\n"
-		"movq %6, %%r8		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
-			"g" (arg3), "g" (arg4)
-		: "rax", "rdi", "rsi", "rdx", "r10", "r8", "memory");
-	return ret;
-}
-
-static long always_inline syscall6(int nr, unsigned long arg0, unsigned long arg1,
-				   unsigned long arg2, unsigned long arg3,
-				   unsigned long arg4, unsigned long arg5)
-{
-	long ret;
-	asm volatile(
-		"movl %1, %%eax		\t\n"
-		"movq %2, %%rdi		\t\n"
-		"movq %3, %%rsi		\t\n"
-		"movq %4, %%rdx		\t\n"
-		"movq %5, %%r10		\t\n"
-		"movq %6, %%r8		\t\n"
-		"movq %7, %%r9		\t\n"
-		"syscall		\t\n"
-		"movq %%rax, %0		\t\n"
-		: "=r"(ret)
-		: "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
-			"g" (arg3), "g" (arg4), "g" (arg5)
-		: "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9", "memory");
-	return ret;
-}
-
-static always_inline unsigned long sys_pause(void)
-{
-	return syscall0(__NR_pause);
-}
-
-static always_inline unsigned long sys_shmat(int shmid, void *shmaddr, int shmflag)
-{
-	return syscall3(__NR_shmat, shmid, (unsigned long)shmaddr, shmflag);
-}
-static always_inline unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot,
-					    unsigned long flags, unsigned long fd, unsigned long offset)
-{
-	return syscall6(__NR_mmap, (unsigned long)addr,
-			len, prot, flags, fd, offset);
-}
-
-static always_inline unsigned long sys_munmap(void *addr,unsigned long len)
-{
-	return syscall2(__NR_munmap, (unsigned long)addr, len);
-}
-
-static always_inline long sys_open(const char *filename, unsigned long flags, unsigned long mode)
-{
-	return syscall3(__NR_open, (unsigned long)filename, flags, mode);
-}
-
-static always_inline long sys_sigaction(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact)
-{
-	return syscall4(__NR_rt_sigaction, signum, (unsigned long)act, (unsigned long)oldact, sizeof(rt_sigset_t));
-}
-
-static always_inline long sys_getitimer(int which, const struct itimerval *val)
-{
-	return syscall2(__NR_getitimer, (unsigned long)which, (unsigned long)val);
-}
-
-static always_inline long sys_setitimer(int which, const struct itimerval *val, struct itimerval *old)
-{
-	return syscall3(__NR_setitimer, (unsigned long)which, (unsigned long)val, (unsigned long)old);
-}
-
-static always_inline long sys_close(int fd)
-{
-	return syscall1(__NR_close, fd);
-}
-
-static always_inline long sys_write(unsigned long fd, const void *buf, unsigned long count)
-{
-	return syscall3(__NR_write, fd, (unsigned long)buf, count);
-}
-
-static always_inline long sys_mincore(unsigned long addr, unsigned long size, void *vec)
-{
-	return syscall3(__NR_mincore, addr, size, (unsigned long)vec);
-}
-
-static always_inline long sys_lseek(unsigned long fd, unsigned long offset, unsigned long origin)
-{
-	return syscall3(__NR_lseek, fd, offset, origin);
-}
-
-static always_inline long sys_mprotect(unsigned long start, unsigned long len, unsigned long prot)
-{
-	return syscall3(__NR_mprotect, start, len, prot);
-}
-
-static always_inline long sys_nanosleep(struct timespec *req, struct timespec *rem)
-{
-	return syscall2(__NR_nanosleep, (unsigned long)req, (unsigned long)rem);
-}
-
-static always_inline long sys_read(unsigned long fd, void *buf, unsigned long count)
-{
-	return syscall3(__NR_read, fd, (unsigned long)buf, count);
-}
-
-static always_inline long sys_waitpid(int pid, int *status, int options)
-{
-	return syscall4(__NR_wait4, pid, (unsigned long)status, options, 0);
-}
-
-static always_inline long sys_exit(unsigned long error_code)
-{
-	return syscall1(__NR_exit, error_code);
-}
-
-static always_inline unsigned long sys_getpid(void)
-{
-	return syscall0(__NR_getpid);
-}
-
-static always_inline unsigned long sys_gettid(void)
-{
-	return syscall0(__NR_gettid);
-}
-
-static always_inline long sys_unlink(char *pathname)
-{
-	return syscall1(__NR_unlink, (unsigned long)pathname);
-}
-
-/*
- * Note this call expects a signal frame on stack
- * (regs->sp) so be very carefull here!
- */
-static always_inline long sys_rt_sigreturn(void)
-{
-	return syscall0(__NR_rt_sigreturn);
-}
-
-static always_inline long sys_sigprocmask(int how, k_rtsigset_t *set,
-		k_rtsigset_t *old)
-{
-	return syscall4(__NR_rt_sigprocmask, how, (unsigned long)set,
-			(unsigned long)old, (unsigned long)sizeof(k_rtsigset_t));
-}
-
-static always_inline long sys_set_thread_area(user_desc_t *info)
-{
-	return syscall1(__NR_set_thread_area, (long)info);
-}
-
-static always_inline long sys_get_thread_area(user_desc_t *info)
-{
-	return syscall1(__NR_get_thread_area, (long)info);
-}
-
-static always_inline long sys_arch_prctl(int code, void *addr)
-{
-	return syscall2(__NR_arch_prctl, code, (unsigned long)addr);
-}
-
-static always_inline long sys_prctl(int code, unsigned long arg2, unsigned long arg3,
-				    unsigned long arg4, unsigned long arg5)
-{
-	return syscall5(__NR_prctl, code, arg2, arg3, arg4, arg5);
-}
-
-static always_inline long sys_brk(unsigned long arg)
-{
-	return syscall1(__NR_brk, arg);
-}
-
-static always_inline long sys_clone(unsigned long flags, void *child_stack,
-				    void *parent_tid, void *child_tid)
-{
-	return syscall4(__NR_clone, flags, (unsigned long)child_stack,
-			(unsigned long)parent_tid, (unsigned long)child_tid);
-}
-
-static always_inline long sys_futex(u32 *uaddr, int op, u32 val,
-				    struct timespec *utime,
-				    u32 *uaddr2, u32 val3)
-{
-	return syscall6(__NR_futex, (unsigned long)uaddr,
-			(unsigned long)op, (unsigned long)val,
-			(unsigned long)utime,
-			(unsigned long)uaddr2,
-			(unsigned long)val3);
-}
-
-static always_inline long sys_flock(unsigned long fd, unsigned long cmd)
-{
-	return syscall2(__NR_flock, fd, cmd);
-}
-
-static void always_inline local_sleep(long seconds)
-{
-	struct timespec req, rem;
-
-	req = (struct timespec){
-		.tv_sec		= seconds,
-		.tv_nsec	= 0,
-	};
-
-	sys_nanosleep(&req, &rem);
-}
-
-static long always_inline sys_kill(long pid, int sig)
-{
-	return syscall2(__NR_kill, pid, (long)sig);
-}
-
-static long always_inline sys_tgkill(long tgid, long pid, int sig)
-{
-	return syscall3(__NR_tgkill, tgid, pid, (long)sig);
-}
-
-static long always_inline sys_msync(void *addr, unsigned long length, int flags)
-{
-	return syscall3(__NR_msync, (long)addr, length, (long)flags);
-}
-
-static long always_inline sys_setns(int fd, int nstype)
-{
-	return syscall2(__NR_setns, (long)fd, (long)nstype);
-}
-
-static long sys_setresuid(int uid, int euid, int suid)
-{
-	return syscall3(__NR_setresuid, (long)uid, (long)euid, (long)suid);
-}
-
-static long sys_setresgid(int gid, int egid, int sgid)
-{
-	return syscall3(__NR_setresgid, (long)gid, (long)egid, (long)sgid);
-}
-
-static long sys_setfsuid(int fsuid)
-{
-	return syscall1(__NR_setfsuid, (long)fsuid);
-}
-
-static long sys_setfsgid(int fsgid)
-{
-	return syscall1(__NR_setfsgid, (long)fsgid);
-}
-
-struct cap_header {
-	u32 version;
-	int pid;
-};
-
-struct cap_data {
-	u32 eff;
-	u32 prm;
-	u32 inh;
-};
-
-static long sys_capset(struct cap_header *h, struct cap_data *d)
-{
-	return syscall2(__NR_capset, (long)h, (long)d);
-}
-
-static int sys_socket(int domain, int type, int protocol)
-{
-	return syscall3(__NR_socket, (long) domain, (long) type, (long) protocol);
-}
-
-struct sockaddr;
-static int sys_bind(int sockfd, const struct sockaddr *addr, int addrlen)
-{
-	return syscall3(__NR_bind, (long)sockfd, (long)addr, (long) addrlen);
-}
-
-struct msghdr;
-static long sys_sendmsg(int sockfd, const struct msghdr *msg, int flags)
-{
-	return syscall3(__NR_sendmsg, (long)sockfd, (long)msg, (long) flags);
-}
-
-static long sys_recvmsg(int sockfd, struct msghdr *msg, int flags)
-{
-	return syscall3(__NR_recvmsg, (long)sockfd, (long)msg, (long) flags);
-}
-
-static long always_inline sys_getsockopt(int sockfd, int level, int optname,
-					 const void *optval, socklen_t *optlen)
-{
-	return syscall5(__NR_getsockopt, (unsigned long)sockfd,
-			(unsigned long)level, (unsigned long)optname,
-			(unsigned long)optval, (unsigned long)optlen);
-}
-
-static long always_inline sys_setsockopt(int sockfd, int level, int optname,
-					 const void *optval, socklen_t optlen)
-{
-	return syscall5(__NR_setsockopt, (unsigned long)sockfd,
-			(unsigned long)level, (unsigned long)optname,
-			(unsigned long)optval, (unsigned long)optlen);
-}
-
-static void sys_set_tid_address(int *tid_addr) {
-	syscall1(__NR_set_tid_address, (long) tid_addr);
-}
-
-static long always_inline
-sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
-{
-	return syscall5(__NR_kcmp, (long)pid1, (long)pid2, (long)type, idx1, idx2);
-}
-
-#ifndef CLONE_NEWPID
-#define CLONE_NEWPID	0x20000000
-#endif
-
-#ifndef CLONE_NEWUTS
-#define CLONE_NEWUTS	0x04000000
-#endif
-
-#ifndef CLONE_NEWIPC
-#define CLONE_NEWIPC	0x08000000
-#endif
-
-#define setns	sys_setns
-
-#else /* CONFIG_X86_64 */
-# error x86-32 bit mode not yet implemented
-#endif /* CONFIG_X86_64 */
-
-#endif /* CR_SYSCALL_H_ */
diff --git a/parasite.c b/parasite.c
index 134b068..ad90452 100644
--- a/parasite.c
+++ b/parasite.c
@@ -45,9 +45,8 @@ static int brk_init(void)
 	/*
 	 *  Map 10 MB. Hope this will be enough for unix skb's...
 	 */
-       ret = sys_mmap(0, MAX_HEAP_SIZE,
-			    PROT_READ | PROT_WRITE,
-			    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       ret = sys_mmap(NULL, MAX_HEAP_SIZE, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (ret < 0)
                return -ENOMEM;
 
@@ -168,7 +167,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
 	if (!(args->vma_entry.prot & PROT_READ)) {
 		prot_old = (unsigned long)args->vma_entry.prot;
 		prot_new = prot_old | PROT_READ;
-		ret = sys_mprotect((unsigned long)args->vma_entry.start,
+		ret = sys_mprotect((void *)args->vma_entry.start,
 				   (unsigned long)vma_entry_len(&args->vma_entry),
 				   prot_new);
 		if (ret) {
@@ -183,7 +182,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
 	 * so stick for mincore as a basis.
 	 */
 
-	ret = sys_mincore((unsigned long)args->vma_entry.start, length, map);
+	ret = sys_mincore((void *)args->vma_entry.start, length, map);
 	if (ret) {
 		sys_write_msg("sys_mincore failed\n");
 		SET_PARASITE_RET(st, ret);
@@ -218,7 +217,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
 	 * Don't left pages readable if they were not.
 	 */
 	if (prot_old != prot_new) {
-		ret = sys_mprotect((unsigned long)args->vma_entry.start,
+		ret = sys_mprotect((void *)args->vma_entry.start,
 				   (unsigned long)vma_entry_len(&args->vma_entry),
 				   prot_old);
 		if (ret) {
@@ -260,7 +259,7 @@ static int dump_sigact(parasite_status_t *st)
 		if (sig == SIGKILL || sig == SIGSTOP)
 			continue;
 
-		ret = sys_sigaction(sig, NULL, &act);
+		ret = sys_sigaction(sig, NULL, &act, sizeof(rt_sigset_t));
 		if (ret < 0) {
 			sys_write_msg("sys_sigaction failed\n");
 			SET_PARASITE_RET(st, ret);
@@ -519,7 +518,7 @@ static int init(struct parasite_init_args *args)
 	}
 
 	ksigfillset(&to_block);
-	ret = sys_sigprocmask(SIG_SETMASK, &to_block, &old_blocked);
+	ret = sys_sigprocmask(SIG_SETMASK, &to_block, &old_blocked, sizeof(k_rtsigset_t));
 	if (ret < 0)
 		reset_blocked = ret;
 	else
@@ -546,7 +545,7 @@ static int parasite_set_logfd(parasite_status_t *st)
 static int fini(void)
 {
 	if (reset_blocked == 1)
-		sys_sigprocmask(SIG_SETMASK, &old_blocked, NULL);
+		sys_sigprocmask(SIG_SETMASK, &old_blocked, NULL, sizeof(k_rtsigset_t));
 	sys_close(logfd);
 	sys_close(tsock);
 	brk_fini();
diff --git a/restorer.c b/restorer.c
index 78b0584..3e4019a 100644
--- a/restorer.c
+++ b/restorer.c
@@ -30,6 +30,7 @@
 			write_num_n(__LINE__);				\
 			write_num_n(ret);				\
 		}							\
+		barrier();						\
 		__ret;							\
 	})
 
@@ -136,7 +137,16 @@ long restore_thread(struct thread_restore_args *args)
 	unsigned long new_sp, fsgs_base;
 	int my_pid = sys_gettid();
 
+	if (!args->pid) {
+		write_string_n("No pids provided");
+		write_num_n(__LINE__);
+		write_num_n(my_pid);
+		write_num_n(args->pid);
+		goto core_restore_end;
+	}
+
 	if (my_pid != args->pid) {
+		write_string_n("args->pid pismatch");
 		write_num_n(__LINE__);
 		write_num_n(my_pid);
 		write_num_n(args->pid);
@@ -185,7 +195,7 @@ long restore_thread(struct thread_restore_args *args)
 	CPREGT1(fs);
 
 	fsgs_base = core_entry->arch.gpregs.fs_base;
-	ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base);
+	ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
 	if (ret) {
 		write_num_n(__LINE__);
 		write_num_n(ret);
@@ -193,7 +203,7 @@ long restore_thread(struct thread_restore_args *args)
 	}
 
 	fsgs_base = core_entry->arch.gpregs.gs_base;
-	ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base);
+	ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
 	if (ret) {
 		write_num_n(__LINE__);
 		write_num_n(ret);
@@ -228,7 +238,7 @@ long restore_thread(struct thread_restore_args *args)
 		: "rax","rsp","memory");
 core_restore_end:
 	write_num_n(__LINE__);
-	write_num_n(sys_getpid());
+	write_num_n(sys_gettid());
 	sys_exit(-1);
 	return -1;
 }
@@ -359,9 +369,9 @@ long restore_task(struct task_restore_core_args *args)
 	rt_sigaction_t act;
 
 	task_entries = args->task_entries;
-	sys_sigaction(SIGCHLD, NULL, &act);
+	sys_sigaction(SIGCHLD, NULL, &act, sizeof(rt_sigset_t));
 	act.rt_sa_handler = sigchld_handler;
-	sys_sigaction(SIGCHLD, &act, NULL);
+	sys_sigaction(SIGCHLD, &act, NULL, sizeof(rt_sigset_t));
 
 	restorer_set_logfd(args->logfd);
 
@@ -482,7 +492,7 @@ long restore_task(struct task_restore_core_args *args)
 		if (vma_entry->prot & PROT_WRITE)
 			continue;
 
-		sys_mprotect(vma_entry->start,
+		sys_mprotect((void *)vma_entry->start,
 			     vma_entry_len(vma_entry),
 			     vma_entry->prot);
 	}
@@ -497,11 +507,17 @@ long restore_task(struct task_restore_core_args *args)
 		goto core_restore_end;
 	}
 
-	sys_set_tid_address((int *) core_entry->clear_tid_address);
+	ret = sys_set_tid_address((int *)core_entry->clear_tid_address);
+	if (ret < 0) {
+		write_num_n(__LINE__);
+		write_num_n(ret);
+		goto core_restore_end;
+	}
 
 	/*
 	 * Tune up the task fields.
 	 */
+	ret = 0;
 	ret |= sys_prctl_safe(PR_SET_NAME, (long)core_entry->tc.comm, 0, 0);
 	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_START_CODE, (long)core_entry->tc.mm_start_code, 0);
 	ret |= sys_prctl_safe(PR_SET_MM, PR_SET_MM_END_CODE,	(long)core_entry->tc.mm_end_code, 0);
@@ -527,8 +543,11 @@ long restore_task(struct task_restore_core_args *args)
 	 */
 	ret = restore_self_exe_late(args);
 	sys_close(args->fd_fdinfo);
-	if (ret)
+	if (ret) {
+		write_num_n(__LINE__);
+		write_num_n(ret);
 		goto core_restore_end;
+	}
 
 	/*
 	 * We need to prepare a valid sigframe here, so
@@ -564,7 +583,7 @@ long restore_task(struct task_restore_core_args *args)
 	CPREG1(fs);
 
 	fsgs_base = core_entry->arch.gpregs.fs_base;
-	ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base);
+	ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
 	if (ret) {
 		write_num_n(__LINE__);
 		write_num_n(ret);
@@ -572,7 +591,7 @@ long restore_task(struct task_restore_core_args *args)
 	}
 
 	fsgs_base = core_entry->arch.gpregs.gs_base;
-	ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base);
+	ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
 	if (ret) {
 		write_num_n(__LINE__);
 		write_num_n(ret);
@@ -713,7 +732,7 @@ long restore_task(struct task_restore_core_args *args)
 
 	cr_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
 
-	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL);
+	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(rt_sigset_t));
 
 	cr_wait_dec(&args->task_entries->nr_in_progress);
 
@@ -739,6 +758,8 @@ long restore_task(struct task_restore_core_args *args)
 
 	ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
 	if (ret < 0) {
+		write_num_n(__LINE__);
+		write_num_n(-ret);
 		ret = ((long)__LINE__ << 32) | -ret;
 		goto core_restore_failed;
 	}
diff --git a/syscall-common.S b/syscall-common.S
new file mode 100644
index 0000000..84bcd8b
--- /dev/null
+++ b/syscall-common.S
@@ -0,0 +1,16 @@
+#include "linkage.h"
+
+#define SYSCALL(name, opcode)		\
+	ENTRY(name);			\
+	movl	$opcode, %eax;		\
+	jmp	__syscall_common;	\
+	END(name)
+
+	.text
+	.align	4
+
+ENTRY(__syscall_common)
+	movq	%rcx, %r10
+	syscall
+	ret
+END(__syscall_common)
diff --git a/syscalls.pl b/syscalls.pl
new file mode 100644
index 0000000..f751869
--- /dev/null
+++ b/syscalls.pl
@@ -0,0 +1,51 @@
+#!/usr/bin/perl -w
+
+my($in, $codes, $protos, $asm, $asmcommon, $proto_types) = @ARGV;
+
+open(IN, "< $in") or die "$0: cannot open: $in\n";
+open(CODES, "> $codes") or die "$0: cannot open: $codes\n";
+open(PROTOS, "> $protos") or die "$0: cannot open: $protos\n";
+open(ASM, "> $asm") or die "$0: cannot open: $asm\n";
+
+$codes =~ s/include\///g;
+$protos =~ s/include\///g;
+$proto_types =~ s/include\///g;
+
+print ASM "/* Autogenerated, don't edit */\n";
+print ASM "#include \"$codes\"\n\n";
+print ASM "#include \"$asmcommon\"\n";
+
+my($codes_def, $protos_def) = ($codes, $protos);
+
+$codes_def =~ s/[\s|\t|\-|\.|\/]/_/g;
+$protos_def =~ s/[\s|\t|\-|\.|\/]/_/g;
+
+print CODES  "/* Autogenerated, don't edit */\n#ifndef $codes_def\n#define $codes_def\n";
+print PROTOS "/* Autogenerated, don't edit */\n#ifndef $protos_def\n#define $protos_def\n";
+print PROTOS "#include \"$proto_types\"\n";
+print PROTOS "#include \"$codes\"\n";
+
+while (defined($line = <IN>)) {
+	chomp $line;
+	$line =~ s/^\s+//;
+	$line =~ s/\s*\#.*$//;
+	next if ($line eq '');
+
+	my(@field) = split(/\t+/, $line);
+
+	if ($#field >= 1) {
+		print CODES "#define $field[0] $field[1]\n";
+	}
+
+	if ($#field >= 2) {
+		print PROTOS "extern long $field[2]$field[3];\n";
+		print ASM "SYSCALL($field[2], $field[0])\n";
+	}
+}
+
+print CODES "#endif /* $codes_def */\n";
+print PROTOS "#endif /* $protos_def */\n";
+
+close(IN);
+close(CODES);
+close(PROTOS);
-- 
1.7.7.6



More information about the CRIU mailing list