[CRIU] [PATCH 2/3] shmem: rework getting file descriptors for shared memory regions

Andrey Vagin avagin at openvz.org
Fri Oct 10 09:39:22 PDT 2014


/proc/PID/map_files are protected by the global CAP_SYS_ADMIN, so we
need to avoid using them to support user namespaces.

We are going to use memfd_create() to get the first file descriptor and
then all others processes will able to open it via /proc/PID/fd/X.

This patch reworks slave processes to not use map_files.

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 include/shmem.h |  2 ++
 pie/restorer.c  | 10 ----------
 shmem.c         | 32 ++++++++++++++++++++++++--------
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/include/shmem.h b/include/shmem.h
index 2526e3e..4b482e8 100644
--- a/include/shmem.h
+++ b/include/shmem.h
@@ -19,6 +19,8 @@ struct shmem_info {
 	int		pid;
 	int		fd;
 	futex_t		lock;
+	int		count;		/* the number of regions */
+	int		self_count;	/* the number of regions, which belongs to "pid" */
 };
 
 struct _VmaEntry;
diff --git a/pie/restorer.c b/pie/restorer.c
index 6c9d0a3..7196a3e 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -881,16 +881,6 @@ long __export_restore_task(struct task_restore_args *args)
 		if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
 			continue;
 
-		if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
-			struct shmem_info *entry;
-
-			entry = find_shmem(args->shmems, args->nr_shmems,
-						  vma_entry->shmid);
-			if (entry && entry->pid == my_pid &&
-			    entry->start == vma_entry->start)
-				futex_set_and_wake(&entry->lock, 1);
-		}
-
 		if (vma_entry->prot & PROT_WRITE)
 			continue;
 
diff --git a/shmem.c b/shmem.c
index 526d9a9..bfe2743 100644
--- a/shmem.c
+++ b/shmem.c
@@ -55,6 +55,7 @@ int collect_shmem(int pid, VmaEntry *vi)
 
 		if (si->size < size)
 			si->size = size;
+		si->count++;
 
 		/*
 		 * Only the shared mapping with a lowest
@@ -62,12 +63,17 @@ int collect_shmem(int pid, VmaEntry *vi)
 		 * will wait until the kernel propagate this mapping
 		 * into /proc
 		 */
-		if (!pid_rst_prio(pid, si->pid))
+		if (!pid_rst_prio(pid, si->pid)) {
+			if (si->pid == pid)
+				si->self_count++;
+
 			return 0;
+		}
 
 		si->pid	 = pid;
 		si->start = vi->start;
 		si->end	 = vi->end;
+		si->self_count = 1;
 
 		return 0;
 	}
@@ -85,6 +91,8 @@ int collect_shmem(int pid, VmaEntry *vi)
 	si->pid	  = pid;
 	si->size  = size;
 	si->fd    = -1;
+	si->count = 1;
+	si->self_count = 1;
 
 	nr_shmems++;
 	futex_init(&si->lock);
@@ -97,17 +105,18 @@ static int shmem_wait_and_open(int pid, struct shmem_info *si)
 	char path[128];
 	int ret;
 
-	snprintf(path, sizeof(path), "/proc/%d/map_files/%lx-%lx",
-		si->pid, si->start, si->end);
+	pr_info("Waiting for the %lx shmem to appear\n", si->shmid);
+	futex_wait_while(&si->lock, 0);
 
-	pr_info("Waiting for [%s] to appear\n", path);
-	futex_wait_until(&si->lock, 1);
+	snprintf(path, sizeof(path), "/proc/%d/fd/%d",
+		si->pid, si->fd);
 
 	pr_info("Opening shmem [%s] \n", path);
-	ret = open_proc_rw(si->pid, "map_files/%lx-%lx", si->start, si->end);
+	ret = open_proc_rw(si->pid, "fd/%d", si->fd);
 	if (ret < 0)
 		pr_perror("     %d: Can't stat shmem at %s",
 				si->pid, path);
+	futex_inc_and_wake(&si->lock);
 	return ret;
 }
 
@@ -207,10 +216,17 @@ int get_shmem_fd(int pid, VmaEntry *vi)
 			(unsigned long) addr,
 			(unsigned long) addr + si->size);
 	munmap(addr, si->size);
-	if (f < 0)
-		return -1;
 
 	si->fd = f;
+
+	/* Send signal to slaves, that they can open fd for this shmem */
+	futex_inc_and_wake(&si->lock);
+	/*
+	 * All other regions in this process will duplicate
+	 * the file descriptor, so we don't wait them.
+	 */
+	futex_wait_until(&si->lock, si->count - si->self_count + 1);
+
 	return f;
 }
 
-- 
1.9.3



More information about the CRIU mailing list