[CRIU] [PATCH 5/8] parasite: Load code blobs via memfd

Cyrill Gorcunov gorcunov at openvz.org
Mon Oct 5 12:54:34 PDT 2015


From: Pavel Emelyanov <xemul at parallels.com>

Currently parasite is loaded using the map_files dir,
which is guarged with CAP_SYS_ADMIN by default (which
is dropped in 4.2 series). So lets do a deal -- try
to use memfd interface first (which has been introduced
in 4.0 kernel series) and if we fail then switch to old
map_files interface.

With time all users are switched to new kernels so
memfd gonna be primary interface.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 parasite-syscall.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/parasite-syscall.c b/parasite-syscall.c
index 7ab8db1d32d2..a0e329bbfa48 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -39,6 +39,9 @@
 #include "asm/restorer.h"
 #include "pie/pie-relocs.h"
 
+#define MEMFD_FNAME	"CRIUMFD"
+#define MEMFD_FNAME_SZ	sizeof(MEMFD_FNAME)
+
 static int can_run_syscall(unsigned long ip, unsigned long start,
 			   unsigned long end, unsigned long pad)
 {
@@ -1103,7 +1106,8 @@ struct parasite_ctl *parasite_prep_ctl(pid_t pid, struct vm_area_list *vma_area_
 		return ctl;
 
 	/* Search a place for injecting syscall */
-	vma_area = get_vma_by_ip(&vma_area_list->h, REG_IP(ctl->orig.regs), 0);
+	vma_area = get_vma_by_ip(&vma_area_list->h, REG_IP(ctl->orig.regs),
+				 MEMFD_FNAME_SZ);
 	if (!vma_area) {
 		pr_err("No suitable VMA found to run parasite "
 		       "bootstrap code (pid: %d)\n", pid);
@@ -1120,7 +1124,7 @@ err:
 	return NULL;
 }
 
-int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
+static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size)
 {
 	int fd;
 
@@ -1152,6 +1156,89 @@ int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
 	return 0;
 }
 
+static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size)
+{
+	void *where = (void *)ctl->syscall_ip + BUILTIN_SYSCALL_SIZE;
+	u8 orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME;
+	pid_t pid = ctl->pid.real;
+	unsigned long sret;
+	int ret, fd, lfd;
+
+	BUILD_BUG_ON(sizeof(orig_code) < sizeof(long));
+
+	if (ptrace_swap_area(pid, where, (void *)orig_code, sizeof(orig_code))) {
+		pr_err("Can't inject memfd args (pid: %d)\n", pid);
+		return -1;
+	}
+
+	ret = syscall_seized(ctl, __NR_memfd_create, &sret,
+			     (unsigned long)where, 0, 0, 0, 0, 0);
+
+	if (ptrace_poke_area(pid, orig_code, where, sizeof(orig_code))) {
+		pr_err("Can't restore memfd args (pid: %d)\n", pid);
+		return -1;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	fd = (int)(long)sret;
+	if (fd == -ENOSYS)
+		return 1;
+	if (fd < 0)
+		return fd;
+
+	ctl->map_length = round_up(size, page_size());
+	lfd = open_proc_rw(ctl->pid.real, "fd/%d", fd);
+	if (lfd < 0)
+		goto err_cure;
+
+	if (ftruncate(lfd, ctl->map_length) < 0) {
+		pr_perror("Fail to truncate memfd for parasite");
+		goto err_cure;
+	}
+
+	ctl->remote_map = mmap_seized(ctl, NULL, size,
+				      PROT_READ | PROT_WRITE | PROT_EXEC,
+				      MAP_FILE | MAP_SHARED, fd, 0);
+	if (!ctl->remote_map) {
+		pr_err("Can't rmap memfd for parasite blob\n");
+		goto err_curef;
+	}
+
+	ctl->local_map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			      MAP_SHARED | MAP_FILE, lfd, 0);
+	if (ctl->local_map == MAP_FAILED) {
+		ctl->local_map = NULL;
+		pr_perror("Can't lmap memfd for parasite blob");
+		goto err_curef;
+	}
+
+	syscall_seized(ctl, __NR_close, &sret, fd, 0, 0, 0, 0, 0);
+	close(lfd);
+
+	pr_info("Set up parasite blob using memfd\n");
+	return 0;
+
+err_curef:
+	close(lfd);
+err_cure:
+	syscall_seized(ctl, __NR_close, &sret, fd, 0, 0, 0, 0, 0);
+	return -1;
+}
+
+int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
+{
+	int ret;
+
+	ret = parasite_memfd_exchange(ctl, size);
+	if (ret == 1) {
+		pr_info("MemFD parasite doesn't work, goto legacy mmap\n");
+		ret = parasite_mmap_exchange(ctl, size);
+	}
+	return ret;
+}
+
 static unsigned long parasite_args_size = PARASITE_ARG_SIZE_MIN;
 void parasite_ensure_args_size(unsigned long sz)
 {
-- 
2.4.3



More information about the CRIU mailing list