[CRIU] [PATCH 02/10] fdstore: add a storage for file descriptors (v2)

Andrei Vagin avagin at openvz.org
Sun Feb 12 21:49:12 PST 2017


From: Andrei Vagin <avagin at virtuozzo.com>

We need a storage for file descriptors which is shared between processes
and doesn't use a lot of file descriptors. We are going to use it on
restore and if it will use file descriptors, we will have to find
descriptors which don't used by all restored processes to not confilict
with their descriptors.

There are two solutions. The first one is a service (process) which
handles to command push_fd(id, fd) and pop_fd(id, fd).

Another solution is to save descriptros in a unix socket.  It requires
only one extra descriptor which we can register as a service fd. Each
unix socket has a buffer and can fit a number of file descriptros. We
can use SK_PEEK_OFF and MSG_PEEK to get file descriptros from a socket
as many times as we need.

This patch implements the second solution.

v2: call recvmsg with MSG_PEEK
Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
---
 criu/Makefile.crtools    |   1 +
 criu/cr-restore.c        |   6 +++
 criu/fdstore.c           | 118 +++++++++++++++++++++++++++++++++++++++++++++++
 criu/include/fdstore.h   |  17 +++++++
 criu/include/servicefd.h |   1 +
 5 files changed, 143 insertions(+)
 create mode 100644 criu/fdstore.c
 create mode 100644 criu/include/fdstore.h

diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index e095d01..afb22c5 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -81,6 +81,7 @@ obj-y			+= uts_ns.o
 obj-y			+= path.o
 obj-y			+= autofs.o
 obj-y			+= uffd.o
+obj-y			+= fdstore.o
 
 ifeq ($(VDSO),y)
 obj-y			+= pie-util-vdso.o
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 7ccb136..07b7ae0 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -78,6 +78,7 @@
 #include "fault-injection.h"
 #include "sk-queue.h"
 #include "sigframe.h"
+#include "fdstore.h"
 
 #include "parasite-syscall.h"
 #include "files-reg.h"
@@ -1396,6 +1397,10 @@ static int restore_task_with_children(void *_arg)
 
 	/* Restore root task */
 	if (current->parent == NULL) {
+
+		if (fdstore_init())
+			goto err;
+
 		if (join_namespaces()) {
 			pr_perror("Join namespaces failed");
 			goto err;
@@ -3194,6 +3199,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	close_proc();
 	close_service_fd(ROOT_FD_OFF);
 	close_service_fd(USERNSD_SK);
+	close_service_fd(FDSTORE_SK_OFF);
 
 	__gcov_flush();
 
diff --git a/criu/fdstore.c b/criu/fdstore.c
new file mode 100644
index 0000000..d9bed4d
--- /dev/null
+++ b/criu/fdstore.c
@@ -0,0 +1,118 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "common/scm.h"
+#include "common/lock.h"
+#include "servicefd.h"
+#include "fdstore.h"
+#include "xmalloc.h"
+#include "rst-malloc.h"
+#include "log.h"
+
+static struct fdstore_desc {
+	int next_id;
+	mutex_t lock; /* to protect a peek offset */
+} *desc;
+
+int fdstore_init(void)
+{
+	struct sockaddr_un addr;
+	unsigned int addrlen;
+	struct stat st;
+	int sk, ret;
+
+	desc = shmalloc(sizeof(*desc));
+	if (!desc)
+		return -1;
+
+	desc->next_id = 0;
+	mutex_init(&desc->lock);
+
+	sk = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+	if (sk < 0) {
+		pr_perror("Unable to create a socket");
+		return -1;
+	}
+
+	if (fstat(sk, &st)) {
+		pr_perror("Unable to stat a file descriptor");
+		close(sk);
+		return -1;
+	}
+
+	addr.sun_family = AF_UNIX;
+	addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%"PRIx64, st.st_ino);
+	addrlen += sizeof(addr.sun_family);
+
+	addr.sun_path[0] = 0;
+
+	/*
+	 * This socket is connected to itself, so all messages are queued to
+	 * its receive queue. Here we are going to use this socket to store
+	 * file descriptors. For that we need to send a file descriptor in
+	 * a queue and remeber its sequence number. Then we can set SO_PEEK_OFF
+	 * to get a file descriptor without dequeuing it.
+	 */
+	if (bind(sk, (struct sockaddr *) &addr, addrlen)) {
+		pr_perror("Unable to bind a socket");
+		close(sk);
+		return -1;
+	}
+	if (connect(sk, (struct sockaddr *) &addr, addrlen)) {
+		pr_perror("Unable to connect a socket");
+		close(sk);
+		return -1;
+	}
+
+	ret = install_service_fd(FDSTORE_SK_OFF, sk);
+	close(sk);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+int fdstore_add(int fd)
+{
+	int sk = get_service_fd(FDSTORE_SK_OFF);
+	int id;
+
+	mutex_lock(&desc->lock);
+
+	if (send_fd(sk, NULL, 0, fd)) {
+		mutex_unlock(&desc->lock);
+		return -1;
+	}
+
+	id = desc->next_id++;
+
+	mutex_unlock(&desc->lock);
+
+	return id;
+}
+
+int fdstore_get(int id)
+{
+	int sk = get_service_fd(FDSTORE_SK_OFF);
+	int fd;
+
+	mutex_lock(&desc->lock);
+	if (setsockopt(sk, SOL_SOCKET, SO_PEEK_OFF, &id, sizeof(id))) {
+		mutex_unlock(&desc->lock);
+		pr_perror("Unable to a peek offset");
+		return -1;
+	}
+
+	if (__recv_fds(sk, &fd, 1, NULL, 0, MSG_PEEK) < 0) {
+		mutex_unlock(&desc->lock);
+		pr_perror("Unable to get a file descriptor with the %d id", id);
+		return -1;
+	}
+	mutex_unlock(&desc->lock);
+
+	return fd;
+}
diff --git a/criu/include/fdstore.h b/criu/include/fdstore.h
new file mode 100644
index 0000000..bdfb5fe
--- /dev/null
+++ b/criu/include/fdstore.h
@@ -0,0 +1,17 @@
+#ifndef __CRIU_FDSTORE_H__
+#define __CRIU_FDSTORE_H__
+
+/*
+ * fdstore is a storage for file descriptors which is shared
+ * between processes.
+ */
+
+int fdstore_init(void);
+
+/* Add a file descriptor to the storage and return its id */
+int fdstore_add(int fd);
+
+/* Get a file descriptor from a storage by id */
+int fdstore_get(int id);
+
+#endif
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index 5152fb6..c070480 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -21,6 +21,7 @@ enum sfd_type {
 	NS_FD_OFF,	/* Node's net namespace fd */
 	TRANSPORT_FD_OFF, /* to transfer file descriptors */
 	LAZY_PAGES_SK_OFF, /* socket for communication with lazy-pages daemon */
+	FDSTORE_SK_OFF, /* socket to store file descriptors */
 
 	SERVICE_FD_MAX
 };
-- 
2.7.4



More information about the CRIU mailing list