[CRIU] [PATCH 08/10] syscalls: Complete redesign v4
Cyrill Gorcunov
gorcunov at openvz.org
Sat Apr 14 16:43:57 EDT 2012
At early days we've been using only a few syscalls
which together with debug compiler options always
produce relative addresses for memory variables used
in parasite and restorer blobs. Thus it came in unnoticed
that there is something worng with syscalls declarations
we use.
Basically all our syscalls are just a wrappers over inline
assembly code in form of
static long syscall2(int nr, long arg0, long arg1)
{
long ret;
asm volatile(
"movl %1, %%eax \t\n"
"movq %2, %%rdi \t\n"
"movq %3, %%rsi \t\n"
"syscall \t\n"
"movq %%rax, %0 \t\n"
: "=r"(ret)
: "g" ((int)nr), "g" (arg0), "g" (arg1)
: "rax", "rdi", "rsi", "memory");
return ret;
}
so every argument treated to be plain long (even if the call
sematics implies it's a memory address passed but not some
integer direct value) and transferred via general purpose
register.
As being mentioned it caused no problems when debug options
specified at compile time, the compiler do not tries to optimize
addressing but generates code which always compute them.
The situation is changed if one is building crtools with
optimization enabled -- the compiler finds that arguments
are rather plain long numbers and might pass direct addresses
of variables, instead of generating relative addresses
(because function declarations have no pointers and 'g' in cope
with 'mov' is used, which is of course wrong).
To fix all this -- now syscalls declarations are generated from
syscall.def file and function arguments are passed in conform
with x86-64 ABI.
This shrinks amount of source code needed to declare syscalls
and opens a way to use optimization.
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
Makefile | 9 +-
Makefile.inc | 1 +
Makefile.pie | 18 +-
Makefile.syscall | 37 ++++
cr-restore.c | 2 +-
include/syscall-codes.h | 63 -------
include/syscall-types.h | 52 ++++++
include/syscall.def | 58 ++++++
include/syscall.h | 444 -----------------------------------------------
parasite.c | 14 +-
restorer.c | 16 +-
syscall-common.S | 16 ++
syscalls.pl | 51 ++++++
13 files changed, 245 insertions(+), 536 deletions(-)
create mode 100644 Makefile.syscall
delete mode 100644 include/syscall-codes.h
create mode 100644 include/syscall-types.h
create mode 100644 include/syscall.def
delete mode 100644 include/syscall.h
create mode 100644 syscall-common.S
create mode 100644 syscalls.pl
diff --git a/Makefile b/Makefile
index dea0745..03532e1 100644
--- a/Makefile
+++ b/Makefile
@@ -48,6 +48,7 @@ OBJS += ipc_ns.o
DEPS := $(patsubst %.o,%.d,$(OBJS))
+-include Makefile.syscall
-include Makefile.pie
all: $(PROGRAM)
@@ -64,11 +65,11 @@ all: $(PROGRAM)
$(E) " CC " $@
$(Q) $(CC) -S $(CFLAGS) -fverbose-asm $< -o $@
-$(PROGRAM): $(OBJS) | $(PIE-GEN)
+$(PROGRAM): $(OBJS) | $(SYS-OBJ) $(PIE-GEN)
$(E) " LINK " $@
- $(Q) $(CC) $(CFLAGS) $(OBJS) $(LIBS) -o $@
+ $(Q) $(CC) $(CFLAGS) $(OBJS) $(SYS-OBJ) $(LIBS) -o $@
-%.d: %.c | $(PIE-GEN)
+%.d: %.c | $(SYS-OBJ) $(PIE-GEN)
$(Q) $(CC) -M -MT $(patsubst %.d,%.o,$@) $(CFLAGS) $< -o $@
test-legacy: $(PROGRAM)
@@ -90,7 +91,7 @@ rebuild:
$(Q) $(MAKE)
.PHONY: rebuild
-clean: cleanpie
+clean: cleanpie cleansyscall
$(E) " CLEAN"
$(Q) $(RM) -f ./*.o
$(Q) $(RM) -f ./*.d
diff --git a/Makefile.inc b/Makefile.inc
index ffa02d9..c755a38 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -19,6 +19,7 @@ NM := nm
AWK := awk
SH := sh
MAKE := make
+PERL := perl
# Additional ARCH settings for x86
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
diff --git a/Makefile.pie b/Makefile.pie
index 5f0076d..8f04dc0 100644
--- a/Makefile.pie
+++ b/Makefile.pie
@@ -26,37 +26,37 @@ DEPS += $(patsubst %.o,%.d,$(ROBJS))
PIEFLAGS := -fpie
ASMFLAGS := -D__ASSEMBLY__
-$(PASM-OBJS): $(PASM-SRC)
+$(PASM-OBJS): $(PASM-SRC) $(SYS-OBJ)
$(E) " CC " $@
$(Q) $(CC) -c $(ASMFLAGS) $(CFLAGS) $(PIEFLAGS) $(patsubst %.o,%.S,$@) -o $@
-$(POBJS): $(PSRCS) $(PASM-OBJS)
+$(POBJS): $(PSRCS) $(PASM-OBJS) $(SYS-OBJ)
$(E) " CC " $@
$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $(patsubst %.o,%.c,$@) -o $@
-parasite-util-net.o: util-net.c
+parasite-util-net.o: util-net.c $(SYS-OBJ)
$(E) " CC " $@
$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $< -o $@
POBJS += parasite-util-net.o
-$(PBLOB-BIN): $(PBLOB-LDS) $(POBJS) $(PASM-OBJS)
+$(PBLOB-BIN): $(PBLOB-LDS) $(POBJS)
$(E) " GEN " $@
- $(Q) $(LD) --oformat=binary -T $(PBLOB-LDS) -o $(PBLOB-BIN) $(POBJS) $(PASM-OBJS)
- $(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) -o $(PBLOB-BIN).o $(POBJS) $(PASM-OBJS)
+ $(Q) $(LD) --oformat=binary -T $(PBLOB-LDS) -o $(PBLOB-BIN) $(POBJS) $(PASM-OBJS) $(SYS-OBJ)
+ $(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) -o $(PBLOB-BIN).o $(POBJS) $(PASM-OBJS) $(SYS-OBJ)
$(PBLOB-HDR): $(PBLOB-BIN) $(GEN-OFFSETS)
$(E) " GEN " $@
$(Q) $(SH) $(GEN-OFFSETS) $(PBLOB-NAME) > $@ || rm -f $@
-$(ROBJS): $(RSRCS)
+$(ROBJS): $(RSRCS) $(SYS-OBJ)
$(E) " CC " $@
$(Q) $(CC) -c $(CFLAGS) $(PIEFLAGS) $(patsubst %.o,%.c,$@) -o $@
$(RBLOB-BIN): $(RBLOB-LDS) $(ROBJS)
$(E) " GEN " $@
- $(Q) $(LD) --oformat=binary -T $(RBLOB-LDS) -o $(RBLOB-BIN) $(ROBJS)
- $(Q) $(LD) --oformat=elf64-x86-64 -T $(RBLOB-LDS) -o $(RBLOB-BIN).o $(ROBJS)
+ $(Q) $(LD) --oformat=binary -T $(RBLOB-LDS) -o $(RBLOB-BIN) $(ROBJS) $(SYS-OBJ)
+ $(Q) $(LD) --oformat=elf64-x86-64 -T $(RBLOB-LDS) -o $(RBLOB-BIN).o $(ROBJS) $(SYS-OBJ)
$(RBLOB-HDR): $(RBLOB-BIN) $(GEN-OFFSETS)
$(E) " GEN " $@
diff --git a/Makefile.syscall b/Makefile.syscall
new file mode 100644
index 0000000..e7e5ae2
--- /dev/null
+++ b/Makefile.syscall
@@ -0,0 +1,37 @@
+SYS-DEF := include/syscall.def
+SYS-ASM-COMMON := syscall-common.S
+SYS-TYPES := include/syscall-types.h
+
+SYS-CODES := include/syscall-codes.h
+SYS-PROTO := include/syscall.h
+
+SYS-ASM := syscall.S
+SYS-GEN := syscalls.pl
+
+SYS-OBJ := $(patsubst %.S,%.o,$(SYS-ASM))
+
+SYS-FLAGS := -pie -Wstrict-prototypes -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer
+
+$(SYS-ASM): $(SYS-GEN) $(SYS-DEF) $(SYS-ASM-COMMON) $(SYS-TYPES)
+ $(E) " GEN " $@
+ $(Q) $(PERL) \
+ $(SYS-GEN) \
+ $(SYS-DEF) \
+ $(SYS-CODES) \
+ $(SYS-PROTO) \
+ $(SYS-ASM) \
+ $(SYS-ASM-COMMON) \
+ $(SYS-TYPES)
+
+$(SYS-OBJ): $(SYS-ASM)
+ $(E) " CC " $@.prelim
+ $(Q) $(CC) -c $(CFLAGS) $(SYS-FLAGS) $< -o $@.prelim
+ $(E) " LD " $@
+ $(Q) $(LD) --oformat=elf64-x86-64 -T $(PBLOB-LDS) $@.prelim -o $@
+
+cleansyscall:
+ $(E) " CLEAN SYSCALLS"
+ $(Q) $(RM) -f ./$(SYS-ASM)
+ $(Q) $(RM) -f ./$(SYS-CODES)
+ $(Q) $(RM) -f ./$(SYS-PROTO)
+ $(Q) $(RM) -f ./*.prelim
diff --git a/cr-restore.c b/cr-restore.c
index 6f4bcad..bf0e42b 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -485,7 +485,7 @@ static int prepare_sigactions(int pid)
* A pure syscall is used, because glibc
* sigaction overwrites se_restorer.
*/
- ret = sys_sigaction(sig, &act, &oact);
+ ret = sys_sigaction(sig, &act, &oact, sizeof(rt_sigset_t));
if (ret == -1) {
pr_err("%d: Can't restore sigaction: %m\n", pid);
goto err;
diff --git a/include/syscall-codes.h b/include/syscall-codes.h
deleted file mode 100644
index b7ed848..0000000
--- a/include/syscall-codes.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef CR_SYSCALL_CODES_H_
-#define CR_SYSCALL_CODES_H_
-
-#ifdef CONFIG_X86_64
-
-#define __NR_read 0
-#define __NR_write 1
-#define __NR_open 2
-#define __NR_close 3
-#define __NR_lseek 8
-#define __NR_mmap 9
-#define __NR_mprotect 10
-#define __NR_munmap 11
-#define __NR_brk 12
-#define __NR_rt_sigaction 13
-#define __NR_rt_sigprocmask 14
-#define __NR_rt_sigreturn 15
-#define __NR_mincore 27
-#define __NR_shmat 30
-#define __NR_dup 32
-#define __NR_dup2 33
-#define __NR_pause 34
-#define __NR_nanosleep 35
-#define __NR_getitimer 36
-#define __NR_setitimer 38
-#define __NR_getpid 39
-#define __NR_socket 41
-#define __NR_sendmsg 46
-#define __NR_recvmsg 47
-#define __NR_bind 49
-#define __NR_setsockopt 54
-#define __NR_getsockopt 55
-#define __NR_clone 56
-#define __NR_exit 60
-#define __NR_wait4 61
-#define __NR_kill 62
-#define __NR_fcntl 72
-#define __NR_flock 73
-#define __NR_unlink 87
-#define __NR_setresuid 117
-#define __NR_setresgid 119
-#define __NR_setfsuid 122
-#define __NR_setfsgid 123
-#define __NR_capset 126
-#define __NR_tgkill 131
-#define __NR__sysctl 156
-#define __NR_prctl 157
-#define __NR_arch_prctl 158
-#define __NR_gettid 186
-#define __NR_futex 202
-#define __NR_set_thread_area 205
-#define __NR_get_thread_area 211
-#define __NR_set_tid_address 218
-#define __NR_restart_syscall 219
-#define __NR_msync 227
-#define __NR_setns 308
-#define __NR_kcmp 312
-
-#else /* CONFIG_X86_64 */
-# error x86-32 bit mode not yet implemented
-#endif /* CONFIG_X86_64 */
-
-#endif /* CR_SYSCALL_CODES_H_ */
diff --git a/include/syscall-types.h b/include/syscall-types.h
new file mode 100644
index 0000000..e3160a3
--- /dev/null
+++ b/include/syscall-types.h
@@ -0,0 +1,52 @@
+/*
+ * Please add here type definitions if
+ * syscall prototypes need them.
+ *
+ * Anything else should go to plain type.h
+ */
+
+#ifndef SYSCALL_TYPES_H__
+#define SYSCALL_TYPES_H__
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+
+#include "types.h"
+#include "compiler.h"
+
+#ifndef CONFIG_X86_64
+# error x86-32 bit mode not yet implemented
+#endif
+
+struct cap_header {
+ u32 version;
+ int pid;
+};
+
+struct cap_data {
+ u32 eff;
+ u32 prm;
+ u32 inh;
+};
+
+struct sockaddr;
+struct msghdr;
+struct rusage;
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef CLONE_NEWUTS
+#define CLONE_NEWUTS 0x04000000
+#endif
+
+#ifndef CLONE_NEWIPC
+#define CLONE_NEWIPC 0x08000000
+#endif
+
+#define setns sys_setns
+
+#endif /* SYSCALL_TYPES_H__ */
diff --git a/include/syscall.def b/include/syscall.def
new file mode 100644
index 0000000..50c0926
--- /dev/null
+++ b/include/syscall.def
@@ -0,0 +1,58 @@
+#
+# System calls table, please make sure the table consist only the syscalls
+# really used somewhere in project.
+#
+# The template is (name and srguments are optinal if you need only __NR_x
+# defined, but no realy entry point in syscalls lib).
+#
+# name code name arguments
+# -----------------------------------------------------------------------
+#
+__NR_read 0 sys_read (int fd, void *buf, unsigned long count)
+__NR_write 1 sys_write (int fd, const void *buf, unsigned long count)
+__NR_open 2 sys_open (const char *filename, unsigned long flags, unsigned long mode)
+__NR_close 3 sys_close (int fd)
+__NR_lseek 8 sys_lseek (int fd, unsigned long offset, unsigned long origin)
+__NR_mmap 9 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset)
+__NR_mprotect 10 sys_mprotect (const void *addr, unsigned long len, unsigned long prot)
+__NR_munmap 11 sys_munmap (void *addr, unsigned long len)
+__NR_brk 12 sys_brk (void *addr)
+__NR_rt_sigaction 13 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize)
+__NR_rt_sigprocmask 14 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize)
+__NR_rt_sigreturn 15 sys_rt_sigreturn (void)
+__NR_mincore 27 sys_mincore (void *addr, unsigned long size, unsigned char *vec)
+__NR_shmat 30 sys_shmat (int shmid, void *shmaddr, int shmflag)
+__NR_pause 34 sys_pause (void)
+__NR_nanosleep 35 sys_nanosleep (struct timespec *req, struct timespec *rem)
+__NR_getitimer 36 sys_getitimer (int which, const struct itimerval *val)
+__NR_setitimer 38 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old)
+__NR_getpid 39 sys_getpid (void)
+__NR_socket 41 sys_socket (int domain, int type, int protocol)
+__NR_sendmsg 46 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags)
+__NR_recvmsg 47 sys_recvmsg (int sockfd, struct msghdr *msg, int flags)
+__NR_bind 49 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen)
+__NR_setsockopt 54 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen)
+__NR_getsockopt 55 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen)
+__NR_clone 56 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, void *child_tid)
+__NR_exit 60 sys_exit (unsigned long error_code)
+__NR_wait4 61 sys_waitpid (int pid, int *status, int options, struct rusage *ru)
+__NR_kill 62 sys_kill (long pid, int sig)
+__NR_fcntl 72 sys_fcntl (int fd, int type, long arg)
+__NR_flock 73 sys_flock (int fd, unsigned long cmd)
+__NR_unlink 87 sys_unlink (char *pathname)
+__NR_setresuid 117 sys_setresuid (int uid, int euid, int suid)
+__NR_setresgid 119 sys_setresgid (int gid, int egid, int sgid)
+__NR_setfsuid 122 sys_setfsuid (int fsuid)
+__NR_setfsgid 123 sys_setfsgid (int fsgid)
+__NR_capset 126 sys_capset (struct cap_header *h, struct cap_data *d)
+__NR_personality 135 sys_personality (unsigned int personality)
+__NR_prctl 157 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
+__NR_arch_prctl 158 sys_arch_prctl (int option, unsigned long addr)
+__NR_gettid 186 sys_gettid (void)
+__NR_futex 202 sys_futex (u32 *uaddr, int op, u32 val, struct timespec *utime, u32 *uaddr2, u32 val3)
+__NR_set_thread_area 205 sys_set_thread_area (user_desc_t *info)
+__NR_get_thread_area 211 sys_get_thread_area (user_desc_t *info)
+__NR_set_tid_address 218 sys_set_tid_address (int *tid_addr)
+__NR_restart_syscall 219 sys_restart_syscall (void)
+__NR_setns 308 sys_setns (int fd, int nstype)
+__NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
diff --git a/include/syscall.h b/include/syscall.h
deleted file mode 100644
index 5baaf7f..0000000
--- a/include/syscall.h
+++ /dev/null
@@ -1,444 +0,0 @@
-#ifndef CR_SYSCALL_H_
-#define CR_SYSCALL_H_
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <arpa/inet.h>
-
-#include "types.h"
-#include "compiler.h"
-#include "syscall-codes.h"
-
-#ifdef CONFIG_X86_64
-
-static always_inline long syscall0(int nr)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr)
- : "rax", "memory");
- return ret;
-}
-
-static always_inline long syscall1(int nr, unsigned long arg0)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0)
- : "rax", "rdi", "memory");
- return ret;
-}
-
-static always_inline long syscall2(int nr, unsigned long arg0, unsigned long arg1)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "movq %3, %%rsi \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0), "g" (arg1)
- : "rax", "rdi", "rsi", "memory");
- return ret;
-}
-
-static always_inline long syscall3(int nr, unsigned long arg0, unsigned long arg1,
- unsigned long arg2)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "movq %3, %%rsi \t\n"
- "movq %4, %%rdx \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2)
- : "rax", "rdi", "rsi", "rdx", "memory");
- return ret;
-}
-
-static always_inline long syscall4(int nr, unsigned long arg0, unsigned long arg1,
- unsigned long arg2, unsigned long arg3)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "movq %3, %%rsi \t\n"
- "movq %4, %%rdx \t\n"
- "movq %5, %%r10 \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
- "g" (arg3)
- : "rax", "rdi", "rsi", "rdx", "r10", "memory");
- return ret;
-}
-
-static long always_inline syscall5(int nr, unsigned long arg0, unsigned long arg1,
- unsigned long arg2, unsigned long arg3,
- unsigned long arg4)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "movq %3, %%rsi \t\n"
- "movq %4, %%rdx \t\n"
- "movq %5, %%r10 \t\n"
- "movq %6, %%r8 \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
- "g" (arg3), "g" (arg4)
- : "rax", "rdi", "rsi", "rdx", "r10", "r8", "memory");
- return ret;
-}
-
-static long always_inline syscall6(int nr, unsigned long arg0, unsigned long arg1,
- unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- long ret;
- asm volatile(
- "movl %1, %%eax \t\n"
- "movq %2, %%rdi \t\n"
- "movq %3, %%rsi \t\n"
- "movq %4, %%rdx \t\n"
- "movq %5, %%r10 \t\n"
- "movq %6, %%r8 \t\n"
- "movq %7, %%r9 \t\n"
- "syscall \t\n"
- "movq %%rax, %0 \t\n"
- : "=r"(ret)
- : "g" ((int)nr), "g" (arg0), "g" (arg1), "g" (arg2),
- "g" (arg3), "g" (arg4), "g" (arg5)
- : "rax", "rdi", "rsi", "rdx", "r10", "r8", "r9", "memory");
- return ret;
-}
-
-static always_inline unsigned long sys_pause(void)
-{
- return syscall0(__NR_pause);
-}
-
-static always_inline unsigned long sys_shmat(int shmid, void *shmaddr, int shmflag)
-{
- return syscall3(__NR_shmat, shmid, (unsigned long)shmaddr, shmflag);
-}
-static always_inline unsigned long sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- return syscall6(__NR_mmap, (unsigned long)addr,
- len, prot, flags, fd, offset);
-}
-
-static always_inline unsigned long sys_munmap(void *addr,unsigned long len)
-{
- return syscall2(__NR_munmap, (unsigned long)addr, len);
-}
-
-static always_inline long sys_open(const char *filename, unsigned long flags, unsigned long mode)
-{
- return syscall3(__NR_open, (unsigned long)filename, flags, mode);
-}
-
-static always_inline long sys_sigaction(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact)
-{
- return syscall4(__NR_rt_sigaction, signum, (unsigned long)act, (unsigned long)oldact, sizeof(rt_sigset_t));
-}
-
-static always_inline long sys_getitimer(int which, const struct itimerval *val)
-{
- return syscall2(__NR_getitimer, (unsigned long)which, (unsigned long)val);
-}
-
-static always_inline long sys_setitimer(int which, const struct itimerval *val, struct itimerval *old)
-{
- return syscall3(__NR_setitimer, (unsigned long)which, (unsigned long)val, (unsigned long)old);
-}
-
-static always_inline long sys_close(int fd)
-{
- return syscall1(__NR_close, fd);
-}
-
-static always_inline long sys_write(unsigned long fd, const void *buf, unsigned long count)
-{
- return syscall3(__NR_write, fd, (unsigned long)buf, count);
-}
-
-static always_inline long sys_mincore(unsigned long addr, unsigned long size, void *vec)
-{
- return syscall3(__NR_mincore, addr, size, (unsigned long)vec);
-}
-
-static always_inline long sys_lseek(unsigned long fd, unsigned long offset, unsigned long origin)
-{
- return syscall3(__NR_lseek, fd, offset, origin);
-}
-
-static always_inline long sys_mprotect(unsigned long start, unsigned long len, unsigned long prot)
-{
- return syscall3(__NR_mprotect, start, len, prot);
-}
-
-static always_inline long sys_nanosleep(struct timespec *req, struct timespec *rem)
-{
- return syscall2(__NR_nanosleep, (unsigned long)req, (unsigned long)rem);
-}
-
-static always_inline long sys_read(unsigned long fd, void *buf, unsigned long count)
-{
- return syscall3(__NR_read, fd, (unsigned long)buf, count);
-}
-
-static always_inline long sys_waitpid(int pid, int *status, int options)
-{
- return syscall4(__NR_wait4, pid, (unsigned long)status, options, 0);
-}
-
-static always_inline long sys_exit(unsigned long error_code)
-{
- return syscall1(__NR_exit, error_code);
-}
-
-static always_inline unsigned long sys_getpid(void)
-{
- return syscall0(__NR_getpid);
-}
-
-static always_inline unsigned long sys_gettid(void)
-{
- return syscall0(__NR_gettid);
-}
-
-static always_inline long sys_unlink(char *pathname)
-{
- return syscall1(__NR_unlink, (unsigned long)pathname);
-}
-
-/*
- * Note this call expects a signal frame on stack
- * (regs->sp) so be very carefull here!
- */
-static always_inline long sys_rt_sigreturn(void)
-{
- return syscall0(__NR_rt_sigreturn);
-}
-
-static always_inline long sys_sigprocmask(int how, k_rtsigset_t *set,
- k_rtsigset_t *old)
-{
- return syscall4(__NR_rt_sigprocmask, how, (unsigned long)set,
- (unsigned long)old, (unsigned long)sizeof(k_rtsigset_t));
-}
-
-static always_inline long sys_set_thread_area(user_desc_t *info)
-{
- return syscall1(__NR_set_thread_area, (long)info);
-}
-
-static always_inline long sys_get_thread_area(user_desc_t *info)
-{
- return syscall1(__NR_get_thread_area, (long)info);
-}
-
-static always_inline long sys_arch_prctl(int code, void *addr)
-{
- return syscall2(__NR_arch_prctl, code, (unsigned long)addr);
-}
-
-static always_inline long sys_prctl(int code, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- return syscall5(__NR_prctl, code, arg2, arg3, arg4, arg5);
-}
-
-static always_inline long sys_brk(unsigned long arg)
-{
- return syscall1(__NR_brk, arg);
-}
-
-static always_inline long sys_clone(unsigned long flags, void *child_stack,
- void *parent_tid, void *child_tid)
-{
- return syscall4(__NR_clone, flags, (unsigned long)child_stack,
- (unsigned long)parent_tid, (unsigned long)child_tid);
-}
-
-static always_inline long sys_futex(u32 *uaddr, int op, u32 val,
- struct timespec *utime,
- u32 *uaddr2, u32 val3)
-{
- return syscall6(__NR_futex, (unsigned long)uaddr,
- (unsigned long)op, (unsigned long)val,
- (unsigned long)utime,
- (unsigned long)uaddr2,
- (unsigned long)val3);
-}
-
-static always_inline long sys_flock(unsigned long fd, unsigned long cmd)
-{
- return syscall2(__NR_flock, fd, cmd);
-}
-
-static void always_inline local_sleep(long seconds)
-{
- struct timespec req, rem;
-
- req = (struct timespec){
- .tv_sec = seconds,
- .tv_nsec = 0,
- };
-
- sys_nanosleep(&req, &rem);
-}
-
-static long always_inline sys_kill(long pid, int sig)
-{
- return syscall2(__NR_kill, pid, (long)sig);
-}
-
-static long always_inline sys_tgkill(long tgid, long pid, int sig)
-{
- return syscall3(__NR_tgkill, tgid, pid, (long)sig);
-}
-
-static long always_inline sys_msync(void *addr, unsigned long length, int flags)
-{
- return syscall3(__NR_msync, (long)addr, length, (long)flags);
-}
-
-static long always_inline sys_setns(int fd, int nstype)
-{
- return syscall2(__NR_setns, (long)fd, (long)nstype);
-}
-
-static long sys_setresuid(int uid, int euid, int suid)
-{
- return syscall3(__NR_setresuid, (long)uid, (long)euid, (long)suid);
-}
-
-static long sys_setresgid(int gid, int egid, int sgid)
-{
- return syscall3(__NR_setresgid, (long)gid, (long)egid, (long)sgid);
-}
-
-static long sys_setfsuid(int fsuid)
-{
- return syscall1(__NR_setfsuid, (long)fsuid);
-}
-
-static long sys_setfsgid(int fsgid)
-{
- return syscall1(__NR_setfsgid, (long)fsgid);
-}
-
-struct cap_header {
- u32 version;
- int pid;
-};
-
-struct cap_data {
- u32 eff;
- u32 prm;
- u32 inh;
-};
-
-static long sys_capset(struct cap_header *h, struct cap_data *d)
-{
- return syscall2(__NR_capset, (long)h, (long)d);
-}
-
-static int sys_socket(int domain, int type, int protocol)
-{
- return syscall3(__NR_socket, (long) domain, (long) type, (long) protocol);
-}
-
-struct sockaddr;
-static int sys_bind(int sockfd, const struct sockaddr *addr, int addrlen)
-{
- return syscall3(__NR_bind, (long)sockfd, (long)addr, (long) addrlen);
-}
-
-struct msghdr;
-static long sys_sendmsg(int sockfd, const struct msghdr *msg, int flags)
-{
- return syscall3(__NR_sendmsg, (long)sockfd, (long)msg, (long) flags);
-}
-
-static long sys_recvmsg(int sockfd, struct msghdr *msg, int flags)
-{
- return syscall3(__NR_recvmsg, (long)sockfd, (long)msg, (long) flags);
-}
-
-static long always_inline sys_getsockopt(int sockfd, int level, int optname,
- const void *optval, socklen_t *optlen)
-{
- return syscall5(__NR_getsockopt, (unsigned long)sockfd,
- (unsigned long)level, (unsigned long)optname,
- (unsigned long)optval, (unsigned long)optlen);
-}
-
-static long always_inline sys_setsockopt(int sockfd, int level, int optname,
- const void *optval, socklen_t optlen)
-{
- return syscall5(__NR_setsockopt, (unsigned long)sockfd,
- (unsigned long)level, (unsigned long)optname,
- (unsigned long)optval, (unsigned long)optlen);
-}
-
-static void sys_set_tid_address(int *tid_addr) {
- syscall1(__NR_set_tid_address, (long) tid_addr);
-}
-
-static long always_inline
-sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
-{
- return syscall5(__NR_kcmp, (long)pid1, (long)pid2, (long)type, idx1, idx2);
-}
-
-static long always_inline sys_fcntl(int fd, int type, long arg)
-{
- return syscall3(__NR_fcntl, (long)fd, (long)type, (long)arg);
-}
-
-#ifndef F_GETFD
-#define F_GETFD 1
-#endif
-
-#ifndef CLONE_NEWPID
-#define CLONE_NEWPID 0x20000000
-#endif
-
-#ifndef CLONE_NEWUTS
-#define CLONE_NEWUTS 0x04000000
-#endif
-
-#ifndef CLONE_NEWIPC
-#define CLONE_NEWIPC 0x08000000
-#endif
-
-#define setns sys_setns
-
-#else /* CONFIG_X86_64 */
-# error x86-32 bit mode not yet implemented
-#endif /* CONFIG_X86_64 */
-
-#endif /* CR_SYSCALL_H_ */
diff --git a/parasite.c b/parasite.c
index e34299e..407376a 100644
--- a/parasite.c
+++ b/parasite.c
@@ -45,7 +45,7 @@ static int brk_init(void)
/*
* Map 10 MB. Hope this will be enough for unix skb's...
*/
- ret = sys_mmap(0, MAX_HEAP_SIZE,
+ ret = sys_mmap(NULL, MAX_HEAP_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ret < 0)
@@ -165,7 +165,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
if (!(args->vma_entry.prot & PROT_READ)) {
prot_old = (unsigned long)args->vma_entry.prot;
prot_new = prot_old | PROT_READ;
- ret = sys_mprotect((unsigned long)args->vma_entry.start,
+ ret = sys_mprotect((void *)args->vma_entry.start,
(unsigned long)vma_entry_len(&args->vma_entry),
prot_new);
if (ret) {
@@ -180,7 +180,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
* so stick for mincore as a basis.
*/
- ret = sys_mincore((unsigned long)args->vma_entry.start, length, map);
+ ret = sys_mincore((void *)args->vma_entry.start, length, map);
if (ret) {
sys_write_msg("sys_mincore failed\n");
SET_PARASITE_RET(st, ret);
@@ -215,7 +215,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
* Don't left pages readable if they were not.
*/
if (prot_old != prot_new) {
- ret = sys_mprotect((unsigned long)args->vma_entry.start,
+ ret = sys_mprotect((void *)args->vma_entry.start,
(unsigned long)vma_entry_len(&args->vma_entry),
prot_old);
if (ret) {
@@ -255,7 +255,7 @@ static int dump_sigact(parasite_status_t *st)
if (sig == SIGKILL || sig == SIGSTOP)
continue;
- ret = sys_sigaction(sig, NULL, &act);
+ ret = sys_sigaction(sig, NULL, &act, sizeof(rt_sigset_t));
if (ret < 0) {
sys_write_msg("sys_sigaction failed\n");
SET_PARASITE_RET(st, ret);
@@ -409,7 +409,7 @@ static int init(struct parasite_init_args *args)
}
ksigfillset(&to_block);
- ret = sys_sigprocmask(SIG_SETMASK, &to_block, &old_blocked);
+ ret = sys_sigprocmask(SIG_SETMASK, &to_block, &old_blocked, sizeof(k_rtsigset_t));
if (ret < 0)
reset_blocked = ret;
else
@@ -436,7 +436,7 @@ static int parasite_set_logfd(parasite_status_t *st)
static int fini(void)
{
if (reset_blocked == 1)
- sys_sigprocmask(SIG_SETMASK, &old_blocked, NULL);
+ sys_sigprocmask(SIG_SETMASK, &old_blocked, NULL, sizeof(k_rtsigset_t));
sys_close(logfd);
sys_close(tsock);
brk_fini();
diff --git a/restorer.c b/restorer.c
index 4cf688b..7af708d 100644
--- a/restorer.c
+++ b/restorer.c
@@ -184,7 +184,7 @@ long __export_restore_thread(struct thread_restore_args *args)
CPREGT1(fs);
fsgs_base = core_entry->arch.gpregs.fs_base;
- ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base);
+ ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
write_num_n(ret);
@@ -192,7 +192,7 @@ long __export_restore_thread(struct thread_restore_args *args)
}
fsgs_base = core_entry->arch.gpregs.gs_base;
- ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base);
+ ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
write_num_n(ret);
@@ -349,9 +349,9 @@ long __export_restore_task(struct task_restore_core_args *args)
rt_sigaction_t act;
task_entries = args->task_entries;
- sys_sigaction(SIGCHLD, NULL, &act);
+ sys_sigaction(SIGCHLD, NULL, &act, sizeof(rt_sigset_t));
act.rt_sa_handler = sigchld_handler;
- sys_sigaction(SIGCHLD, &act, NULL);
+ sys_sigaction(SIGCHLD, &act, NULL, sizeof(rt_sigset_t));
restorer_set_logfd(args->logfd);
@@ -460,7 +460,7 @@ long __export_restore_task(struct task_restore_core_args *args)
if (vma_entry->prot & PROT_WRITE)
continue;
- sys_mprotect(vma_entry->start,
+ sys_mprotect((void *)vma_entry->start,
vma_entry_len(vma_entry),
vma_entry->prot);
}
@@ -538,7 +538,7 @@ long __export_restore_task(struct task_restore_core_args *args)
CPREG1(fs);
fsgs_base = core_entry->arch.gpregs.fs_base;
- ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base);
+ ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
write_num_n(ret);
@@ -546,7 +546,7 @@ long __export_restore_task(struct task_restore_core_args *args)
}
fsgs_base = core_entry->arch.gpregs.gs_base;
- ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base);
+ ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
if (ret) {
write_num_n(__LINE__);
write_num_n(ret);
@@ -687,7 +687,7 @@ long __export_restore_task(struct task_restore_core_args *args)
futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
- sys_sigaction(SIGCHLD, &args->sigchld_act, NULL);
+ sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(rt_sigset_t));
futex_dec_and_wake(&args->task_entries->nr_in_progress);
diff --git a/syscall-common.S b/syscall-common.S
new file mode 100644
index 0000000..84bcd8b
--- /dev/null
+++ b/syscall-common.S
@@ -0,0 +1,16 @@
+#include "linkage.h"
+
+#define SYSCALL(name, opcode) \
+ ENTRY(name); \
+ movl $opcode, %eax; \
+ jmp __syscall_common; \
+ END(name)
+
+ .text
+ .align 4
+
+ENTRY(__syscall_common)
+ movq %rcx, %r10
+ syscall
+ ret
+END(__syscall_common)
diff --git a/syscalls.pl b/syscalls.pl
new file mode 100644
index 0000000..f751869
--- /dev/null
+++ b/syscalls.pl
@@ -0,0 +1,51 @@
+#!/usr/bin/perl -w
+
+my($in, $codes, $protos, $asm, $asmcommon, $proto_types) = @ARGV;
+
+open(IN, "< $in") or die "$0: cannot open: $in\n";
+open(CODES, "> $codes") or die "$0: cannot open: $codes\n";
+open(PROTOS, "> $protos") or die "$0: cannot open: $protos\n";
+open(ASM, "> $asm") or die "$0: cannot open: $asm\n";
+
+$codes =~ s/include\///g;
+$protos =~ s/include\///g;
+$proto_types =~ s/include\///g;
+
+print ASM "/* Autogenerated, don't edit */\n";
+print ASM "#include \"$codes\"\n\n";
+print ASM "#include \"$asmcommon\"\n";
+
+my($codes_def, $protos_def) = ($codes, $protos);
+
+$codes_def =~ s/[\s|\t|\-|\.|\/]/_/g;
+$protos_def =~ s/[\s|\t|\-|\.|\/]/_/g;
+
+print CODES "/* Autogenerated, don't edit */\n#ifndef $codes_def\n#define $codes_def\n";
+print PROTOS "/* Autogenerated, don't edit */\n#ifndef $protos_def\n#define $protos_def\n";
+print PROTOS "#include \"$proto_types\"\n";
+print PROTOS "#include \"$codes\"\n";
+
+while (defined($line = <IN>)) {
+ chomp $line;
+ $line =~ s/^\s+//;
+ $line =~ s/\s*\#.*$//;
+ next if ($line eq '');
+
+ my(@field) = split(/\t+/, $line);
+
+ if ($#field >= 1) {
+ print CODES "#define $field[0] $field[1]\n";
+ }
+
+ if ($#field >= 2) {
+ print PROTOS "extern long $field[2]$field[3];\n";
+ print ASM "SYSCALL($field[2], $field[0])\n";
+ }
+}
+
+print CODES "#endif /* $codes_def */\n";
+print PROTOS "#endif /* $protos_def */\n";
+
+close(IN);
+close(CODES);
+close(PROTOS);
--
1.7.7.6
More information about the CRIU
mailing list