[CRIU] [PATCH 5/8] parasite: Load code blobs via memfd
Cyrill Gorcunov
gorcunov at openvz.org
Mon Oct 5 12:54:34 PDT 2015
From: Pavel Emelyanov <xemul at parallels.com>
Currently parasite is loaded using the map_files dir,
which is guarged with CAP_SYS_ADMIN by default (which
is dropped in 4.2 series). So lets do a deal -- try
to use memfd interface first (which has been introduced
in 4.0 kernel series) and if we fail then switch to old
map_files interface.
With time all users are switched to new kernels so
memfd gonna be primary interface.
Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
parasite-syscall.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 89 insertions(+), 2 deletions(-)
diff --git a/parasite-syscall.c b/parasite-syscall.c
index 7ab8db1d32d2..a0e329bbfa48 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -39,6 +39,9 @@
#include "asm/restorer.h"
#include "pie/pie-relocs.h"
+#define MEMFD_FNAME "CRIUMFD"
+#define MEMFD_FNAME_SZ sizeof(MEMFD_FNAME)
+
static int can_run_syscall(unsigned long ip, unsigned long start,
unsigned long end, unsigned long pad)
{
@@ -1103,7 +1106,8 @@ struct parasite_ctl *parasite_prep_ctl(pid_t pid, struct vm_area_list *vma_area_
return ctl;
/* Search a place for injecting syscall */
- vma_area = get_vma_by_ip(&vma_area_list->h, REG_IP(ctl->orig.regs), 0);
+ vma_area = get_vma_by_ip(&vma_area_list->h, REG_IP(ctl->orig.regs),
+ MEMFD_FNAME_SZ);
if (!vma_area) {
pr_err("No suitable VMA found to run parasite "
"bootstrap code (pid: %d)\n", pid);
@@ -1120,7 +1124,7 @@ err:
return NULL;
}
-int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
+static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size)
{
int fd;
@@ -1152,6 +1156,89 @@ int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
return 0;
}
+static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size)
+{
+ void *where = (void *)ctl->syscall_ip + BUILTIN_SYSCALL_SIZE;
+ u8 orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME;
+ pid_t pid = ctl->pid.real;
+ unsigned long sret;
+ int ret, fd, lfd;
+
+ BUILD_BUG_ON(sizeof(orig_code) < sizeof(long));
+
+ if (ptrace_swap_area(pid, where, (void *)orig_code, sizeof(orig_code))) {
+ pr_err("Can't inject memfd args (pid: %d)\n", pid);
+ return -1;
+ }
+
+ ret = syscall_seized(ctl, __NR_memfd_create, &sret,
+ (unsigned long)where, 0, 0, 0, 0, 0);
+
+ if (ptrace_poke_area(pid, orig_code, where, sizeof(orig_code))) {
+ pr_err("Can't restore memfd args (pid: %d)\n", pid);
+ return -1;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ fd = (int)(long)sret;
+ if (fd == -ENOSYS)
+ return 1;
+ if (fd < 0)
+ return fd;
+
+ ctl->map_length = round_up(size, page_size());
+ lfd = open_proc_rw(ctl->pid.real, "fd/%d", fd);
+ if (lfd < 0)
+ goto err_cure;
+
+ if (ftruncate(lfd, ctl->map_length) < 0) {
+ pr_perror("Fail to truncate memfd for parasite");
+ goto err_cure;
+ }
+
+ ctl->remote_map = mmap_seized(ctl, NULL, size,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FILE | MAP_SHARED, fd, 0);
+ if (!ctl->remote_map) {
+ pr_err("Can't rmap memfd for parasite blob\n");
+ goto err_curef;
+ }
+
+ ctl->local_map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FILE, lfd, 0);
+ if (ctl->local_map == MAP_FAILED) {
+ ctl->local_map = NULL;
+ pr_perror("Can't lmap memfd for parasite blob");
+ goto err_curef;
+ }
+
+ syscall_seized(ctl, __NR_close, &sret, fd, 0, 0, 0, 0, 0);
+ close(lfd);
+
+ pr_info("Set up parasite blob using memfd\n");
+ return 0;
+
+err_curef:
+ close(lfd);
+err_cure:
+ syscall_seized(ctl, __NR_close, &sret, fd, 0, 0, 0, 0, 0);
+ return -1;
+}
+
+int parasite_map_exchange(struct parasite_ctl *ctl, unsigned long size)
+{
+ int ret;
+
+ ret = parasite_memfd_exchange(ctl, size);
+ if (ret == 1) {
+ pr_info("MemFD parasite doesn't work, goto legacy mmap\n");
+ ret = parasite_mmap_exchange(ctl, size);
+ }
+ return ret;
+}
+
static unsigned long parasite_args_size = PARASITE_ARG_SIZE_MIN;
void parasite_ensure_args_size(unsigned long sz)
{
--
2.4.3
More information about the CRIU
mailing list