[CRIU] [PATCH 2/8] parasite: support sockets queues
Kinsbursky Stanislav
skinsbursky at openvz.org
Tue Feb 28 05:31:19 EST 2012
This patch adds sockets queue dump functionality. Key ideas
1) sockets info is passed as plain array in parasite args.
2) new socket option SO_PEEK_OFF with MSG_PEEK is used to read the get the
queue's packets.
3) Buffer for packet will be allocated for each socket separately and with
size of socket receiving buffer. For stream sockets is means, that it's queue
will be dumped in chunks of this size.
Note: loop around sys_msgrcv() is required for DGRAM sockets - sys_msgrcv()
with MSG_PEEK will return only one packet.
Based on xemul@ patches.
Signed-off-by: Stanislav Kinsbursky <skinsbursky at openvz.org>
---
include/image.h | 6 ++
include/parasite.h | 9 +++
include/sockets.h | 6 ++
include/syscall-codes.h | 2 +
include/syscall.h | 16 ++++++
parasite.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 166 insertions(+), 0 deletions(-)
diff --git a/include/image.h b/include/image.h
index c9b7209..e98e682 100644
--- a/include/image.h
+++ b/include/image.h
@@ -103,6 +103,12 @@ struct inet_sk_entry {
u32 src_addr[4];
} __packed;
+struct sk_packet_entry {
+ u32 id_for;
+ u32 length;
+ u8 data[0];
+} __packed;
+
struct vma_entry {
u64 start;
u64 end;
diff --git a/include/parasite.h b/include/parasite.h
index f331d85..30e1d46 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -8,6 +8,7 @@
#include "compiler.h"
#include "syscall.h"
#include "image.h"
+#include "sockets.h"
#define __parasite_head __used __section(.parasite.head.text)
@@ -27,6 +28,7 @@
#define PARASITE_ERR_MPROTECT -1031
#define PARASITE_ERR_SIGACTION -1032
#define PARASITE_ERR_GETITIMER -1033
+#define PARASITE_ERR_IOCTL -1034
enum {
PARASITE_CMD_PINGME,
@@ -42,6 +44,7 @@ enum {
PARASITE_CMD_DUMP_ITIMERS,
PARASITE_CMD_DUMP_MISC,
PARASITE_CMD_DUMP_TID_ADDR,
+ PARASITE_CMD_DUMP_SK_QUEUES,
PARASITE_CMD_MAX,
};
@@ -96,6 +99,12 @@ struct parasite_dump_tid_addr {
unsigned int *tid_addr;
};
+struct parasite_dump_sk_queues {
+ parasite_status_t status;
+ unsigned nr_items;
+ struct sk_queue_item items[0];
+};
+
/*
* Some useful offsets
*/
diff --git a/include/sockets.h b/include/sockets.h
index 3140f6b..68597a9 100644
--- a/include/sockets.h
+++ b/include/sockets.h
@@ -5,6 +5,12 @@
#include <unistd.h>
#include <stdbool.h>
+struct sk_queue_item {
+ int fd;
+ int type;
+ unsigned int sk_id;
+};
+
struct cr_fdset;
extern int try_dump_socket(pid_t pid, int fd, const struct cr_fdset *cr_fdset);
diff --git a/include/syscall-codes.h b/include/syscall-codes.h
index 2a470c6..44f3668 100644
--- a/include/syscall-codes.h
+++ b/include/syscall-codes.h
@@ -28,6 +28,8 @@
#define __NR_sendmsg 46
#define __NR_recvmsg 47
#define __NR_bind 49
+#define __NR_setsockopt 54
+#define __NR_getsockopt 55
#define __NR_clone 56
#define __NR_exit 60
#define __NR_wait4 61
diff --git a/include/syscall.h b/include/syscall.h
index f759587..aa4acd5 100644
--- a/include/syscall.h
+++ b/include/syscall.h
@@ -387,6 +387,22 @@ static long sys_recvmsg(int sockfd, struct msghdr *msg, int flags)
return syscall3(__NR_recvmsg, (long)sockfd, (long)msg, (long) flags);
}
+static long always_inline sys_getsockopt(int sockfd, int level, int optname,
+ const void *optval, socklen_t *optlen)
+{
+ return syscall5(__NR_getsockopt, (unsigned long)sockfd,
+ (unsigned long)level, (unsigned long)optname,
+ (unsigned long)optval, (unsigned long)optlen);
+}
+
+static long always_inline sys_setsockopt(int sockfd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ return syscall5(__NR_setsockopt, (unsigned long)sockfd,
+ (unsigned long)level, (unsigned long)optname,
+ (unsigned long)optval, (unsigned long)optlen);
+}
+
static void sys_set_tid_address(int *tid_addr) {
syscall1(__NR_set_tid_address, (long) tid_addr);
}
diff --git a/parasite.c b/parasite.c
index 97428ce..55222ff 100644
--- a/parasite.c
+++ b/parasite.c
@@ -8,6 +8,10 @@
#ifdef CONFIG_X86_64
+#ifndef SO_PEEK_OFF
+#define SO_PEEK_OFF 42
+#endif
+
static void *brk_start, *brk_end, *brk_tail;
static struct page_entry page;
@@ -40,6 +44,11 @@ static void brk_fini(void)
sys_munmap(brk_start, brk_end - brk_start);
}
+struct mem_array {
+ unsigned long size;
+ char data[0];
+};
+
static void *brk_alloc(unsigned long bytes)
{
void *addr = NULL;
@@ -378,6 +387,122 @@ static int dump_tid_addr(struct parasite_dump_tid_addr *args)
return 0;
}
+static int dump_socket_queue(int img_fd, struct sk_queue_item *item, int *err)
+{
+ struct sk_packet_entry *pe;
+ unsigned long size;
+ socklen_t tmp;
+ int ret, minus_one = -1;
+ int sock_fd = item->fd;
+
+ /*
+ * Discover max DGRAM size
+ */
+ ret = sys_getsockopt(sock_fd, SOL_SOCKET, SO_RCVBUF, &ret, &tmp);
+ if (ret < 0) {
+ sys_write_msg("getsockopt failed\n");
+ *err = ret;
+ return PARASITE_ERR_FAIL;
+ }
+ /*
+ * Note: 32 bytes will be used by kernel for protocol header.
+ */
+ size = ret - 32;
+ /*
+ * Try to alloc buffer for max supported DGRAM + our header.
+ * Note: STREAM queue will be written by chunks of this size.
+ */
+ pe = brk_alloc(size + sizeof(struct sk_packet_entry));
+ if (!pe) {
+ sys_write_msg("not enough mem for skb\n");
+ *err = -ENOMEM;
+ return PARASITE_ERR_MMAP;
+ }
+ /*
+ * Enable peek offset incrementation.
+ */
+ ret = 0;
+ *err = sys_setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &ret, sizeof(int));
+ if (*err < 0) {
+ sys_write_msg("setsockopt fail\n");
+ ret = PARASITE_ERR_FAIL;
+ goto err_brk;
+ }
+
+ pe->id_for = item->sk_id;
+
+ while (1) {
+ struct iovec iov = {
+ .iov_base = pe->data,
+ .iov_len = size,
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0,
+ };
+
+ *err = pe->length = sys_recvmsg(sock_fd, &msg, MSG_DONTWAIT | MSG_PEEK);
+ if (*err < 0) {
+ if (*err == -EAGAIN)
+ break; /* we're done */
+ sys_write_msg("sys_recvmsg fail: error\n");
+ ret = PARASITE_ERR_FAIL;
+ goto err_set_sock;
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ /*
+ * DGRAM thuncated. This should not happen. But we have
+ * to check...
+ */
+ sys_write_msg("sys_recvmsg failed: truncated\n");
+ *err = -E2BIG;
+ goto err_set_sock;
+ }
+ *err = sys_write(img_fd, pe, sizeof(pe) + pe->length);
+ if (*err != sizeof(pe) + pe->length) {
+ sys_write_msg("sys_write failed\n");
+ goto err_set_sock;
+ }
+ }
+ ret = 0;
+err_set_sock:
+ sys_setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &minus_one, sizeof(int));
+err_brk:
+ brk_free(size + sizeof(struct sk_packet_entry));
+ return ret;
+}
+
+static int dump_skqueues(struct parasite_dump_sk_queues *args)
+{
+ parasite_status_t *st = &args->status;
+ int img_fd, i, ret = -1;
+
+ img_fd = recv_fd(tsock);
+ if (img_fd < 0)
+ return img_fd;
+
+ for (i = 0; i < args->nr_items; i++) {
+ int err;
+
+ ret = dump_socket_queue(img_fd, &args->items[i], &err);
+ if (ret < 0) {
+ SET_PARASITE_STATUS(st, ret, err);
+ goto err_dmp;
+ }
+ }
+
+ ret = 0;
+ SET_PARASITE_STATUS(st, 0, 0);
+err_dmp:
+ sys_close(img_fd);
+ return ret;
+}
+
static int init(struct parasite_init_args *args)
{
int ret;
@@ -452,6 +577,8 @@ static int __used parasite_service(unsigned long cmd, void *args)
return dump_misc((struct parasite_dump_misc *)args);
case PARASITE_CMD_DUMP_TID_ADDR:
return dump_tid_addr((struct parasite_dump_tid_addr *)args);
+ case PARASITE_CMD_DUMP_SK_QUEUES:
+ return dump_skqueues((struct parasite_dump_sk_queues *)args);
default:
sys_write_msg("Unknown command to parasite\n");
break;
More information about the CRIU
mailing list