[CRIU] [PATCH v2 2/9] parasite: support sockets queues

Kinsbursky Stanislav skinsbursky at openvz.org
Wed Feb 29 08:06:40 EST 2012


This patch adds sockets queue dump functionality. Key ideas
1) sockets info is passed as plain array in parasite args.
2) new socket option SO_PEEK_OFF with MSG_PEEK is used to read the get the
queue's packets.
3) Buffer for packet will be allocated for each socket separately and with
size of socket sending buffer. For stream sockets is means, that it's queue
will be dumped in chunks of this size.
Note: loop around sys_msgrcv() is required for DGRAM sockets - sys_msgrcv()
with MSG_PEEK will return only one packet.

Based on xemul@ patches.

Signed-off-by: Stanislav Kinsbursky <skinsbursky at openvz.org>

---
 include/image.h         |    6 ++
 include/parasite.h      |    9 +++
 include/sockets.h       |    6 ++
 include/syscall-codes.h |    2 +
 include/syscall.h       |   17 ++++++
 parasite.c              |  141 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 181 insertions(+), 0 deletions(-)

diff --git a/include/image.h b/include/image.h
index 09d7bc4..f37f678 100644
--- a/include/image.h
+++ b/include/image.h
@@ -101,6 +101,12 @@ struct inet_sk_entry {
 	u32	src_addr[4];
 } __packed;
 
+struct sk_packet_entry {
+	u32	id_for;
+	u32	length;
+	u8	data[0];
+} __packed;
+
 struct vma_entry {
 	u64	start;
 	u64	end;
diff --git a/include/parasite.h b/include/parasite.h
index c8c9973..2a67647 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -7,6 +7,7 @@
 
 #include "compiler.h"
 #include "image.h"
+#include "sockets.h"
 
 #define __parasite_head		__used __section(.parasite.head.text)
 
@@ -26,6 +27,7 @@
 #define PARASITE_ERR_MPROTECT	-1031
 #define PARASITE_ERR_SIGACTION  -1032
 #define PARASITE_ERR_GETITIMER  -1033
+#define PARASITE_ERR_IOCTL	-1034
 
 enum {
 	PARASITE_CMD_PINGME,
@@ -41,6 +43,7 @@ enum {
 	PARASITE_CMD_DUMP_ITIMERS,
 	PARASITE_CMD_DUMP_MISC,
 	PARASITE_CMD_DUMP_TID_ADDR,
+	PARASITE_CMD_DUMP_SK_QUEUES,
 
 	PARASITE_CMD_MAX,
 };
@@ -95,6 +98,12 @@ struct parasite_dump_tid_addr {
 	unsigned int *tid_addr;
 };
 
+struct parasite_dump_sk_queues {
+	parasite_status_t	status;
+	unsigned		nr_items;
+	struct sk_queue_item	items[0];
+};
+
 /*
  * Some useful offsets
  */
diff --git a/include/sockets.h b/include/sockets.h
index 3140f6b..68597a9 100644
--- a/include/sockets.h
+++ b/include/sockets.h
@@ -5,6 +5,12 @@
 #include <unistd.h>
 #include <stdbool.h>
 
+struct sk_queue_item {
+	int		fd;
+	int		type;
+	unsigned int	sk_id;
+};
+
 struct cr_fdset;
 extern int try_dump_socket(pid_t pid, int fd, const struct cr_fdset *cr_fdset);
 
diff --git a/include/syscall-codes.h b/include/syscall-codes.h
index 7951771..86adf75 100644
--- a/include/syscall-codes.h
+++ b/include/syscall-codes.h
@@ -28,6 +28,8 @@
 #define __NR_sendmsg		46
 #define __NR_recvmsg		47
 #define __NR_bind		49
+#define __NR_setsockopt		54
+#define __NR_getsockopt		55
 #define __NR_clone		56
 #define __NR_exit		60
 #define __NR_wait4		61
diff --git a/include/syscall.h b/include/syscall.h
index a2b1a9e..6f284aa 100644
--- a/include/syscall.h
+++ b/include/syscall.h
@@ -3,6 +3,7 @@
 
 #include <sys/types.h>
 #include <sys/time.h>
+#include <arpa/inet.h>
 
 #include "types.h"
 #include "compiler.h"
@@ -387,6 +388,22 @@ static long sys_recvmsg(int sockfd, struct msghdr *msg, int flags)
 	return syscall3(__NR_recvmsg, (long)sockfd, (long)msg, (long) flags);
 }
 
+static long always_inline sys_getsockopt(int sockfd, int level, int optname,
+					 const void *optval, socklen_t *optlen)
+{
+	return syscall5(__NR_getsockopt, (unsigned long)sockfd,
+			(unsigned long)level, (unsigned long)optname,
+			(unsigned long)optval, (unsigned long)optlen);
+}
+
+static long always_inline sys_setsockopt(int sockfd, int level, int optname,
+					 const void *optval, socklen_t optlen)
+{
+	return syscall5(__NR_setsockopt, (unsigned long)sockfd,
+			(unsigned long)level, (unsigned long)optname,
+			(unsigned long)optval, (unsigned long)optlen);
+}
+
 static void sys_set_tid_address(int *tid_addr) {
 	syscall1(__NR_set_tid_address, (long) tid_addr);
 }
diff --git a/parasite.c b/parasite.c
index 9a9114f..e012426 100644
--- a/parasite.c
+++ b/parasite.c
@@ -11,6 +11,10 @@
 
 #ifdef CONFIG_X86_64
 
+#ifndef SO_PEEK_OFF
+#define SO_PEEK_OFF            42
+#endif
+
 static void *brk_start, *brk_end, *brk_tail;
 
 static struct page_entry page;
@@ -43,6 +47,11 @@ static void brk_fini(void)
 	sys_munmap(brk_start, brk_end - brk_start);
 }
 
+struct mem_array {
+	unsigned long size;
+	char data[0];
+};
+
 static void *brk_alloc(unsigned long bytes)
 {
 	void *addr = NULL;
@@ -381,6 +390,136 @@ static int dump_tid_addr(struct parasite_dump_tid_addr *args)
 	return 0;
 }
 
+static int dump_socket_queue(int img_fd, struct sk_queue_item *item, int *err)
+{
+	struct sk_packet_entry *pe;
+	unsigned long size;
+	socklen_t tmp;
+	int ret, orig_peek_off;
+	int sock_fd = item->fd;
+
+	/*
+	 * Save original peek offset. 
+	 */
+	ret = sys_getsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, &tmp);
+	if (ret < 0) {
+		sys_write_msg("getsockopt failed\n");
+		*err = ret;
+		return PARASITE_ERR_FAIL;
+	}
+	/*
+	 * Discover max DGRAM size
+	 */
+	ret = sys_getsockopt(sock_fd, SOL_SOCKET, SO_SNDBUF, &ret, &tmp);
+	if (ret < 0) {
+		sys_write_msg("getsockopt failed\n");
+		*err = ret;
+		return PARASITE_ERR_FAIL;
+	}
+	/*
+	 * Note: 32 bytes will be used by kernel for protocol header.
+	 */
+	size = ret - 32;
+	/*
+	 * Try to alloc buffer for max supported DGRAM + our header.
+	 * Note: STREAM queue will be written by chunks of this size.
+	 */
+	pe = brk_alloc(size + sizeof(struct sk_packet_entry));
+	if (!pe) {
+		sys_write_msg("not enough mem for skb\n");
+		*err = -ENOMEM;
+		return PARASITE_ERR_MMAP;
+	}
+	/*
+	 * Enable peek offset incrementation.
+	 */
+	ret = 0;
+	*err = sys_setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &ret, sizeof(int));
+	if (*err < 0) {
+		sys_write_msg("setsockopt fail\n");
+		ret = PARASITE_ERR_FAIL;
+		goto err_brk;
+	}
+
+	pe->id_for = item->sk_id;
+
+	while (1) {
+		struct iovec iov = {
+			.iov_base = pe->data,
+			.iov_len = size,
+		};
+		struct msghdr msg = {
+			.msg_name = NULL,
+			.msg_namelen = 0,
+			.msg_iov = &iov,
+			.msg_iovlen = 1,
+			.msg_control = NULL,
+			.msg_controllen = 0,
+			.msg_flags = 0,
+		};
+
+		*err = pe->length = sys_recvmsg(sock_fd, &msg, MSG_DONTWAIT | MSG_PEEK);
+		if (*err < 0) {
+			if (*err == -EAGAIN)
+				break; /* we're done */
+			sys_write_msg("sys_recvmsg fail: error\n");
+			ret = PARASITE_ERR_FAIL;
+			goto err_set_sock;
+		}
+		if (msg.msg_flags & MSG_TRUNC) {
+			/*
+			 * DGRAM thuncated. This should not happen. But we have
+			 * to check...
+			 */
+			sys_write_msg("sys_recvmsg failed: truncated\n");
+			ret = PARASITE_ERR_FAIL;
+			*err = -E2BIG;
+			goto err_set_sock;
+		}
+		*err = sys_write(img_fd, pe, sizeof(pe) + pe->length);
+		if (*err != sizeof(pe) + pe->length) {
+			sys_write_msg("sys_write failed\n");
+			ret = PARASITE_ERR_WRITE;
+			goto err_set_sock;
+		}
+	}
+	ret = 0;
+err_set_sock:
+	/*
+	 * Restore original peek offset. 
+	 */
+	sys_setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, sizeof(int));
+err_brk:
+	brk_free(size + sizeof(struct sk_packet_entry));
+	return ret;
+}
+
+static int dump_skqueues(struct parasite_dump_sk_queues *args)
+{
+	parasite_status_t *st = &args->status;
+	int img_fd, i, ret = -1;
+
+	img_fd = recv_fd(tsock);
+	if (img_fd < 0)
+		return img_fd;
+
+	for (i = 0; i < args->nr_items; i++) {
+		int err;
+
+		ret = dump_socket_queue(img_fd, &args->items[i], &err);
+		if (ret < 0) {
+			SET_PARASITE_STATUS(st, ret, err);
+			goto err_dmp;
+		}
+	}
+
+	ret = 0;
+	SET_PARASITE_STATUS(st, 0, 0);
+err_dmp:
+	sys_close(img_fd);
+	return ret;
+}
+
 static int init(struct parasite_init_args *args)
 {
 	int ret;
@@ -455,6 +594,8 @@ static int __used parasite_service(unsigned long cmd, void *args)
 		return dump_misc((struct parasite_dump_misc *)args);
 	case PARASITE_CMD_DUMP_TID_ADDR:
 		return dump_tid_addr((struct parasite_dump_tid_addr *)args);
+	case PARASITE_CMD_DUMP_SK_QUEUES:
+		return dump_skqueues((struct parasite_dump_sk_queues *)args);
 	default:
 		sys_write_msg("Unknown command to parasite\n");
 		break;



More information about the CRIU mailing list