[CRIU] [PATCH 3/4] arch/x86: allow to execute syscalls via process_vm_exec

Andrei Vagin avagin at gmail.com
Wed Apr 14 08:52:16 MSK 2021


process_vm_exec allows to execute code in an address space of another
process. It changes the current address space to the target address
space and resume the current process with registers from sigcontex that
is passed in the arguments.

This changes adds the PROCESS_VM_EXEC_SYSCALL flag and if it is set
process_vm_exec will execute a system call with arguments from sigcontext.

process_vm_exec retuns 0 if the system call has been executed and an error
code in other cases.

A return code of the system call can be found in a proper register in
sigcontext.

Signed-off-by: Andrei Vagin <avagin at gmail.com>
---
 arch/x86/entry/common.c              |  5 ++++-
 arch/x86/kernel/process_vm_exec.c    | 29 +++++++++++++++++++++++++++-
 include/linux/entry-common.h         |  2 ++
 include/linux/process_vm_exec.h      |  2 ++
 include/uapi/linux/process_vm_exec.h |  8 ++++++++
 kernel/entry/common.c                |  2 +-
 6 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 include/uapi/linux/process_vm_exec.h

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 42eac459b25b..8de02ca19aca 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,10 @@
 __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
 #ifdef CONFIG_PROCESS_VM_EXEC
-	if (current->exec_mm && current->exec_mm->ctx) {
+	struct exec_mm *exec_mm = current->exec_mm;
+
+	if (exec_mm && exec_mm->ctx &&
+	    !(exec_mm->flags & PROCESS_VM_EXEC_SYSCALL)) {
 		kernel_siginfo_t info = {
 			.si_signo = SIGSYS,
 			.si_call_addr = (void __user *)KSTK_EIP(current),
diff --git a/arch/x86/kernel/process_vm_exec.c b/arch/x86/kernel/process_vm_exec.c
index 28b32330f744..9124b23f1e9b 100644
--- a/arch/x86/kernel/process_vm_exec.c
+++ b/arch/x86/kernel/process_vm_exec.c
@@ -11,6 +11,7 @@
 #include <linux/sched/mm.h>
 #include <linux/syscalls.h>
 #include <linux/vmacache.h>
+#include <linux/entry-common.h>
 #include <linux/process_vm_exec.h>
 
 static void swap_mm(struct mm_struct *prev_mm, struct mm_struct *target_mm)
@@ -73,7 +74,7 @@ SYSCALL_DEFINE6(process_vm_exec, pid_t, pid, struct sigcontext __user *, uctx,
 
 	sigset_t mask;
 
-	if (flags)
+	if (flags & ~PROCESS_VM_EXEC_SYSCALL)
 		return -EINVAL;
 
 	if (sizemask != sizeof(sigset_t))
@@ -97,6 +98,9 @@ SYSCALL_DEFINE6(process_vm_exec, pid_t, pid, struct sigcontext __user *, uctx,
 	}
 
 	current_pt_regs()->ax = 0;
+	if (flags & PROCESS_VM_EXEC_SYSCALL)
+		syscall_exit_to_user_mode_prepare(current_pt_regs());
+
 	ret = swap_vm_exec_context(uctx);
 	if (ret < 0)
 		goto err_mm_put;
@@ -117,6 +121,29 @@ SYSCALL_DEFINE6(process_vm_exec, pid_t, pid, struct sigcontext __user *, uctx,
 	mmgrab(prev_mm);
 	swap_mm(prev_mm, mm);
 
+	if (flags & PROCESS_VM_EXEC_SYSCALL) {
+		struct pt_regs *regs = current_pt_regs();
+		kernel_siginfo_t info;
+		int sysno;
+
+		regs->orig_ax = regs->ax;
+		regs->ax = -ENOSYS;
+		sysno = syscall_get_nr(current, regs);
+
+		do_syscall_64(sysno, regs);
+
+		restore_vm_exec_context(regs);
+		info.si_signo = SIGSYS;
+		info.si_call_addr = (void __user *)KSTK_EIP(current);
+		info.si_arch = syscall_get_arch(current);
+		info.si_syscall = sysno;
+		ret = copy_siginfo_to_user(current->exec_mm->siginfo, &info);
+		current_pt_regs()->orig_ax = __NR_process_vm_exec;
+		current_pt_regs()->ax = -ENOSYS;
+		syscall_enter_from_user_mode_work(current_pt_regs(), current_pt_regs()->orig_ax);
+		return ret;
+	}
+
 	ret = current_pt_regs()->ax;
 
 	return ret;
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 474f29638d2c..d0ebbe9ca9e4 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -285,6 +285,8 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
 }
 #endif
 
+void syscall_exit_to_user_mode_prepare(struct pt_regs *regs);
+
 /**
  * syscall_exit_to_user_mode - Handle work before returning to user mode
  * @regs:	Pointer to currents pt_regs
diff --git a/include/linux/process_vm_exec.h b/include/linux/process_vm_exec.h
index a02535fbd5c8..2e04b4875a92 100644
--- a/include/linux/process_vm_exec.h
+++ b/include/linux/process_vm_exec.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_PROCESS_VM_EXEC_H
 #define _LINUX_PROCESS_VM_EXEC_H
 
+#include <uapi/linux/process_vm_exec.h>
+
 struct exec_mm {
 	struct sigcontext *ctx;
 	struct mm_struct *mm;
diff --git a/include/uapi/linux/process_vm_exec.h b/include/uapi/linux/process_vm_exec.h
new file mode 100644
index 000000000000..35465b5d3ebf
--- /dev/null
+++ b/include/uapi/linux/process_vm_exec.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_PROCESS_VM_EXEC_H
+#define _UAPI_LINUX_PROCESS_VM_EXEC_H
+
+#define PROCESS_VM_EXEC_SYSCALL 0x1UL
+
+#endif
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index e9e2df3f3f9e..c325a2e5ecf4 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -235,7 +235,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long ti_work)
  * Syscall specific exit to user mode preparation. Runs with interrupts
  * enabled.
  */
-static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
+void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
 {
 	u32 cached_flags = READ_ONCE(current_thread_info()->flags);
 	unsigned long nr = syscall_get_nr(current, regs);
-- 
2.29.2



More information about the CRIU mailing list