[CRIU] [PATCH 3/3] inet: Rework inet sk dumping on new fdinfo scheme

Pavel Emelyanov xemul at parallels.com
Tue Mar 27 04:46:08 EDT 2012


Now every inetsk fd dump results in a new entry in the fdinfo.img file. Sockets itself are
dumped into inetsk.img global image file. On restore the generic fdinfo redistribution algo
is used and inet sockets are opened only when required.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
 cr-show.c         |    1 +
 files.c           |   14 ++++-
 include/crtools.h |    4 +-
 include/image.h   |    3 +-
 include/sockets.h |    2 +
 sockets.c         |  134 +++++++++++++++++++++++++++++------------------------
 6 files changed, 91 insertions(+), 67 deletions(-)

diff --git a/cr-show.c b/cr-show.c
index 84d9659..6293be3 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -62,6 +62,7 @@ static char *fdtype2s(u8 type)
 		[FDINFO_MAP] = "map",
 		[FDINFO_CWD] = "cwd",
 		[FDINFO_EXE] = "exe",
+		[FDINFO_INETSK] = "isk",
 	};
 
 	if (type > FDINFO_UND && type < FD_INFO_MAX)
diff --git a/files.c b/files.c
index e7b813e..0fe0e03 100644
--- a/files.c
+++ b/files.c
@@ -19,6 +19,7 @@
 #include "util.h"
 #include "util-net.h"
 #include "lock.h"
+#include "sockets.h"
 
 static struct fdinfo_desc *fdinfo_descs;
 static int nr_fdinfo_descs;
@@ -313,7 +314,15 @@ static int open_fd(int pid, struct fdinfo_entry *fe,
 	if ((fi->pid != pid) || (fe->addr != fi->addr))
 		return 0;
 
-	tmp = open_fe_fd(fe);
+	switch (fe->type) {
+	case FDINFO_REG:
+		tmp = open_fe_fd(fe);
+		break;
+	case FDINFO_INETSK:
+		tmp = open_inet_sk(fe);
+		break;
+	}
+
 	if (tmp < 0)
 		return -1;
 
@@ -429,8 +438,7 @@ static int open_fdinfo(int pid, struct fdinfo_entry *fe, int *fdinfo_fd, int sta
 	pr_info("\t%d: Got fd for %lx users %d\n", pid,
 			fe->addr, futex_get(&fi->users));
 
-	BUG_ON(fe->type != FDINFO_REG);
-
+	BUG_ON(fd_is_special(fe));
 
 	switch (state) {
 	case FD_STATE_PREP:
diff --git a/include/crtools.h b/include/crtools.h
index 6cc2146..5dc924a 100644
--- a/include/crtools.h
+++ b/include/crtools.h
@@ -27,7 +27,6 @@ enum {
 	CR_FD_PIPES,
 	CR_FD_SIGACT,
 	CR_FD_UNIXSK,
-	CR_FD_INETSK,
 	CR_FD_ITIMERS,
 	CR_FD_CREDS,
 	_CR_FD_TASK_TO,
@@ -50,6 +49,7 @@ enum {
 	_CR_FD_GLOB_FROM,
 	CR_FD_SK_QUEUES,
 	CR_FD_REG_FILES,
+	CR_FD_INETSK,
 	_CR_FD_GLOB_TO,
 
 	CR_FD_MAX
@@ -102,7 +102,7 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
 #define FMT_FNAME_PSTREE	"pstree.img"
 #define FMT_FNAME_SIGACTS	"sigacts-%d.img"
 #define FMT_FNAME_UNIXSK	"unixsk-%d.img"
-#define FMT_FNAME_INETSK	"inetsk-%d.img"
+#define FMT_FNAME_INETSK	"inetsk.img"
 #define FMT_FNAME_ITIMERS	"itimers-%d.img"
 #define FMT_FNAME_CREDS		"creds-%d.img"
 #define FMT_FNAME_UTSNS		"utsns-%d.img"
diff --git a/include/image.h b/include/image.h
index 3316078..a5a7066 100644
--- a/include/image.h
+++ b/include/image.h
@@ -34,7 +34,7 @@ enum fd_types {
 	FDINFO_UND,
 	FDINFO_REG,
 	FDINFO_MAP,
-
+	FDINFO_INETSK,
 	FDINFO_CWD,
 	FDINFO_EXE,
 
@@ -93,7 +93,6 @@ struct unix_sk_entry {
 } __packed;
 
 struct inet_sk_entry {
-	u32	fd;
 	u32	id;
 	u8	family;
 	u8	type;
diff --git a/include/sockets.h b/include/sockets.h
index 204dbcb..b84d63d 100644
--- a/include/sockets.h
+++ b/include/sockets.h
@@ -27,6 +27,8 @@ extern int try_dump_socket(pid_t pid, int fd, const struct cr_fdset *cr_fdset,
 
 extern int collect_sockets(void);
 extern int prepare_sockets(int pid);
+struct fdinfo_entry;
+extern int open_inet_sk(struct fdinfo_entry *fe);
 struct cr_options;
 extern void show_unixsk(int fd, struct cr_options *);
 extern void show_inetsk(int fd, struct cr_options *);
diff --git a/sockets.c b/sockets.c
index 4856f9e..b4f8363 100644
--- a/sockets.c
+++ b/sockets.c
@@ -41,6 +41,7 @@ struct socket_desc {
 	unsigned int		family;
 	unsigned int		ino;
 	struct socket_desc	*next;
+	int			already_dumped;
 };
 
 struct unix_sk_desc {
@@ -177,9 +178,9 @@ static void show_one_inet_img(const char *act, const struct inet_sk_entry *e)
 		pr_perror("Failed to translate address");
 	}
 
-	pr_debug("\t%s: fd %d family %d type %d proto %d port %d "
+	pr_debug("\t%s: family %d type %d proto %d port %d "
 		"state %d src_addr %s\n",
-		act, e->fd, e->family, e->type, e->proto, e->src_port,
+		act, e->family, e->type, e->proto, e->src_port,
 		e->state, src_addr);
 }
 
@@ -237,19 +238,29 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
 	return 1;
 }
 
-static int dump_one_inet(const struct socket_desc *_sk, int fd,
+static int dump_one_inet(struct socket_desc *_sk, int fd,
 			 const struct cr_fdset *cr_fdset,
 			 struct sk_queue *queue)
 {
-	const struct inet_sk_desc *sk = (struct inet_sk_desc *)_sk;
+	struct inet_sk_desc *sk = (struct inet_sk_desc *)_sk;
 	struct inet_sk_entry ie;
+	struct fdinfo_entry fe;
 
 	if (!can_dump_inet_sk(sk))
 		goto err;
 
+	fe.addr = fd;
+	fe.type = FDINFO_INETSK;
+	fe.id = sk->sd.ino;
+
+	if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &fe))
+		goto err;
+
+	if (sk->sd.already_dumped)
+		return 0;
+
 	memset(&ie, 0, sizeof(ie));
 
-	ie.fd		= fd;
 	ie.id		= sk->sd.ino;
 	ie.family	= sk->sd.family;
 	ie.type		= sk->type;
@@ -261,12 +272,13 @@ static int dump_one_inet(const struct socket_desc *_sk, int fd,
 	memcpy(ie.src_addr, sk->src_addr, sizeof(u32) * 4);
 	memcpy(ie.dst_addr, sk->dst_addr, sizeof(u32) * 4);
 
-	if (write_img(fdset_fd(cr_fdset, CR_FD_INETSK), &ie))
+	if (write_img(fdset_fd(glob_fdset, CR_FD_INETSK), &ie))
 		goto err;
 
 	pr_info("Dumping inet socket at %d\n", fd);
 	show_one_inet("Dumping", sk);
 	show_one_inet_img("Dumped", &ie);
+	sk->sd.already_dumped = 1;
 	return 0;
 
 err:
@@ -372,7 +384,7 @@ err:
 int try_dump_socket(pid_t pid, int fd, const struct cr_fdset *cr_fdset,
 		    struct sk_queue *queue)
 {
-	const struct socket_desc *sk;
+	struct socket_desc *sk;
 	struct statfs fst;
 	struct stat st;
 	char path[64];
@@ -1176,24 +1188,56 @@ err:
 	return ret;
 }
 
-static int open_inet_sk(const struct inet_sk_entry *ie, int *img_fd)
+static int read_inetsk_image(u32 id, struct inet_sk_entry *ie)
+{
+	int ifd;
+
+	ifd = open_image_ro(CR_FD_INETSK);
+	if (ifd < 0)
+		return -1;
+
+	while (1) {
+		int ret;
+
+		ret = read_img_eof(ifd, ie);
+		if (ret < 0)
+			return ret;
+
+		if (ret == 0) {
+			pr_err("Can't find inet sk %u\n", id);
+			return -1;
+		}
+
+		if (ie->id == id)
+			break;
+	}
+
+	close(ifd);
+	return 0;
+}
+
+int open_inet_sk(struct fdinfo_entry *fe)
 {
 	int sk;
 	struct sockaddr_in addr;
+	struct inet_sk_entry ie;
+
+	if (read_inetsk_image(fe->id, &ie))
+		return -1;
 
-	show_one_inet_img("Restore", ie);
+	show_one_inet_img("Restore", &ie);
 
-	if (ie->family != AF_INET) {
-		pr_err("Unsupported socket family: %d\n", ie->family);
+	if (ie.family != AF_INET) {
+		pr_err("Unsupported socket family: %d\n", ie.family);
 		return -1;
 	}
 
-	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
-		pr_err("Unsupported socket type: %d\n", ie->type);
+	if ((ie.type != SOCK_STREAM) && (ie.type != SOCK_DGRAM)) {
+		pr_err("Unsupported socket type: %d\n", ie.type);
 		return -1;
 	}
 
-	sk = socket(ie->family, ie->type, ie->proto);
+	sk = socket(ie.family, ie.type, ie.proto);
 	if (sk < 0) {
 		pr_perror("Can't create unix socket");
 		return -1;
@@ -1204,37 +1248,37 @@ static int open_inet_sk(const struct inet_sk_entry *ie, int *img_fd)
 	 * bind() and listen(), and that's all.
 	 */
 	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = ie->family;
-	addr.sin_port = htons(ie->src_port);
-	memcpy(&addr.sin_addr.s_addr, ie->src_addr, sizeof(unsigned int) * 4);
+	addr.sin_family = ie.family;
+	addr.sin_port = htons(ie.src_port);
+	memcpy(&addr.sin_addr.s_addr, ie.src_addr, sizeof(unsigned int) * 4);
 
 	if (bind(sk, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
 		pr_perror("Can't bind to a socket");
 		goto err;
 	}
 
-	if (ie->state == TCP_LISTEN) {
-		if (ie->proto != IPPROTO_TCP) {
-			pr_err("Wrong socket in listen state %d\n", ie->proto);
+	if (ie.state == TCP_LISTEN) {
+		if (ie.proto != IPPROTO_TCP) {
+			pr_err("Wrong socket in listen state %d\n", ie.proto);
 			goto err;
 		}
 
-		if (listen(sk, ie->backlog) == -1) {
+		if (listen(sk, ie.backlog) == -1) {
 			pr_perror("Can't listen on a socket");
 			goto err;
 		}
 	}
 
-	if (ie->state == TCP_ESTABLISHED) {
-		if (ie->proto != IPPROTO_UDP) {
+	if (ie.state == TCP_ESTABLISHED) {
+		if (ie.proto != IPPROTO_UDP) {
 			pr_err("Connected TCP socket in image\n");
 			goto err;
 		}
 
 		memset(&addr, 0, sizeof(addr));
-		addr.sin_family = ie->family;
-		addr.sin_port = htons(ie->dst_port);
-		memcpy(&addr.sin_addr.s_addr, ie->dst_addr, sizeof(ie->dst_addr));
+		addr.sin_family = ie.family;
+		addr.sin_port = htons(ie.dst_port);
+		memcpy(&addr.sin_addr.s_addr, ie.dst_addr, sizeof(ie.dst_addr));
 
 		if (connect(sk, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
 			pr_perror("Can't connect UDP socket back");
@@ -1242,49 +1286,19 @@ static int open_inet_sk(const struct inet_sk_entry *ie, int *img_fd)
 		}
 	}
 
-	if (move_img_fd(img_fd, ie->fd))
-		return -1;
-
-	return reopen_fd_as(ie->fd, sk);
+	return sk;
 
 err:
 	close(sk);
 	return -1;
 }
 
-static int prepare_inet_sockets(int pid)
-{
-	int isk_fd, ret = -1;
-
-	isk_fd = open_image_ro(CR_FD_INETSK, pid);
-	if (isk_fd < 0)
-		return -1;
-
-	while (1) {
-		struct inet_sk_entry ie;
-
-		ret = read_img_eof(isk_fd, &ie);
-		if (ret <= 0)
-			break;
-
-		ret = open_inet_sk(&ie, &isk_fd);
-		if (ret)
-			break;
-	}
-err:
-	close(isk_fd);
-	return ret;
-}
-
 int prepare_sockets(int pid)
 {
 	int err;
 
 	pr_info("%d: Opening sockets\n", pid);
-	err = prepare_unix_sockets(pid);
-	if (err)
-		return err;
-	return prepare_inet_sockets(pid);
+	return prepare_unix_sockets(pid);
 }
 
 void show_inetsk(int fd, struct cr_options *o)
@@ -1314,8 +1328,8 @@ void show_inetsk(int fd, struct cr_options *o)
 			}
 		}
 
-		pr_msg("fd %d family %d type %d proto %d state %d %s:%d <-> %s:%d\n",
-			ie.fd, ie.family, ie.type, ie.proto, ie.state, 
+		pr_msg("id %x family %d type %d proto %d state %d %s:%d <-> %s:%d\n",
+			ie.id, ie.family, ie.type, ie.proto, ie.state, 
 			src_addr, ie.src_port, dst_addr, ie.dst_port);
 	}
 
-- 
1.7.6.5


More information about the CRIU mailing list