[CRIU] [PATCH 2/4] sfd: Rework install, clone helpers to use fcntl

Cyrill Gorcunov gorcunov at openvz.org
Tue May 30 11:13:31 PDT 2017


dup3 closes opened target descriptor silently which already can
be borrowed by some other code (for example with containers with
huge memory usage, say 256G, the page transfer pipe may already
open the fd we reserved for service engine), so we should use
fcntl here instead and exit with error is such situation
happened.

Moreover, for calling convenience if there is previous
sfd already assigned and marked in sfd map -- just close
it before setting up new instance.

Another problem is clone_service_fd, we may have a situation
(say fdt_shared testcase) where service_fd_id downgrates so
we're trying to reinstall already opened desciptors. For
a while we simply close the desctination because it is
early-restore-forking stage where we hardly ever get
collistion with already opened files, but still it's
not 100% protected and we need to improve sfd engine
more making tracking per-id based together with proper
locking scheme.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/servicefd.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 79 insertions(+), 12 deletions(-)

diff --git a/criu/servicefd.c b/criu/servicefd.c
index 185021d2ef25..245891b79cbc 100644
--- a/criu/servicefd.c
+++ b/criu/servicefd.c
@@ -8,18 +8,46 @@
 
 #include "servicefd.h"
 #include "bitops.h"
-#include "log.h"
+#include "criu-log.h"
 
 #include "common/bug.h"
 
 #undef	LOG_PREFIX
 #define LOG_PREFIX "sfd: "
 
+// #define SFD_DEBUG
+
+#ifdef SFD_DEBUG
+# define pr_sfd_debug(fmt, ...)	pr_debug(fmt, ##__VA_ARGS__)
+#else
+# define pr_sfd_debug(fmt, ...)
+#endif
+
 static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX);
 
 static int service_fd_rlim_cur;
 static int service_fd_id;
 
+#define __gen_type_str(x)	[x] = __stringify_1(x)
+static char * const type_str[SERVICE_FD_MAX] = {
+	__gen_type_str(LOG_FD_OFF),
+	__gen_type_str(IMG_FD_OFF),
+	__gen_type_str(PROC_FD_OFF),
+	__gen_type_str(PROC_PID_FD_OFF),
+	__gen_type_str(CTL_TTY_OFF),
+	__gen_type_str(SELF_STDIN_OFF),
+	__gen_type_str(CR_PROC_FD_OFF),
+	__gen_type_str(ROOT_FD_OFF),
+	__gen_type_str(CGROUP_YARD),
+	__gen_type_str(USERNSD_SK),
+	__gen_type_str(NS_FD_OFF),
+	__gen_type_str(TRANSPORT_FD_OFF),
+	__gen_type_str(RPC_SK_OFF),
+	__gen_type_str(FDSTORE_SK_OFF),
+	__gen_type_str(LAZY_PAGES_SK_OFF),
+};
+#undef __gen_type_str
+
 int init_service_fd(void)
 {
 	struct rlimit64 rlimit;
@@ -62,11 +90,30 @@ int reserve_service_fd(enum sfd_type type)
 int install_service_fd(enum sfd_type type, int fd)
 {
 	int sfd = __get_service_fd(type, service_fd_id);
+	int ret;
 
 	BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
+	BUG_ON(type >= ARRAY_SIZE(type_str));
+
+	pr_sfd_debug("install %d/%d (%s)\n", service_fd_id, sfd, type_str[type]);
+	if (test_bit(type, sfd_map)) {
+		pr_sfd_debug("\tclose previous %d/%d (%s)\n",
+			     service_fd_id, sfd, type_str[type]);
+		close_service_fd(type);
+	}
 
-	if (dup3(fd, sfd, O_CLOEXEC) != sfd) {
-		pr_perror("Dup %d -> %d failed", fd, sfd);
+	ret = fcntl(fd, F_DUPFD_CLOEXEC, sfd);
+	if (ret != sfd) {
+		if (ret < 0) {
+			pr_perror("%d/%d -> %d/%d (%s) transition failed",
+				  service_fd_id, fd, sfd,
+				  service_fd_id, type_str[type]);
+		} else {
+			pr_err("%d/%d (%s) is busy\n",
+			       service_fd_id, sfd,
+			       type_str[type]);
+			close(ret);
+		}
 		return -1;
 	}
 
@@ -92,34 +139,54 @@ int close_service_fd(enum sfd_type type)
 
 	close(fd);
 	clear_bit(type, sfd_map);
+	pr_sfd_debug("close %d/%d (%s)\n", service_fd_id, fd, type_str[type]);
 	return 0;
 }
 
 int clone_service_fd(int id)
 {
-	int ret = -1, i;
+	int ret, i;
 
 	if (service_fd_id == id)
 		return 0;
 
+	pr_sfd_debug("clone %d/%d\n", service_fd_id, id);
 	for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++) {
 		int old = get_service_fd(i);
 		int new = __get_service_fd(i, id);
 
 		if (old < 0)
 			continue;
-		ret = dup2(old, new);
-		if (ret == -1) {
-			if (errno == EBADF)
-				continue;
-			pr_perror("Unable to clone %d->%d", old, new);
+
+		/*
+		 * Cloning of service fds happen at early stage
+		 * where no other files needed are opened, so
+		 * for simplicity just close the destination.
+		 *
+		 * FIXME: Still better to invent more deep tracking
+		 * of service files opened, say every rsti(item)
+		 * structure should have own sfd_map and everything
+		 * should be under appropriate shared locking.
+		 */
+		close(new);
+		ret = fcntl(old, F_DUPFD, new);
+		if (ret < 0 || ret != new) {
+			if (ret < 0) {
+				pr_perror("clone %d/%d -> %d/%d (%s) transition failed",
+					  service_fd_id, old, id, new, type_str[i]);
+			} else {
+				pr_err("clone %d/%d -> %d/%d (%s) busy\n",
+				       service_fd_id, old, id, new, type_str[i]);
+				close(ret);
+			}
+			return -1;
 		}
+		pr_sfd_debug("clone %d/%d -> %d/%d (%s)\n",
+			     service_fd_id, old, id, new, type_str[i]);
 	}
 
 	service_fd_id = id;
-	ret = 0;
-
-	return ret;
+	return 0;
 }
 
 bool is_any_service_fd(int fd)
-- 
2.7.4



More information about the CRIU mailing list