[CRIU] [PATCH 09/21] soccr: add support for half-closed sockets
Andrei Vagin
avagin at openvz.org
Mon Nov 28 11:27:44 PST 2016
From: Andrei Vagin <avagin at virtuozzo.com>
A socket is in one of half-closed states, if it sent a fin packet
or it received a fin packet.
CRIU plays with fin packets to restore half-closed states too.
When we need to sent a fin packet from a socket, we can call
shutdown(SHUT_WR). When a fin packet has to be restore in
a received queue, criu generate a fin packet and send it via
a raw ip socket.
A raw packet is sent with the SOCCR_MARK mark to be able
to not block it.
Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
---
criu/Makefile.packages | 2 +-
soccr/soccr.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++-
soccr/soccr.h | 11 ++-
3 files changed, 185 insertions(+), 5 deletions(-)
diff --git a/criu/Makefile.packages b/criu/Makefile.packages
index 53fbdae..886394f 100644
--- a/criu/Makefile.packages
+++ b/criu/Makefile.packages
@@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES += libcap-dev
REQ-DEB-PKG-TEST-NAMES += libaio-dev
-export LIBS += -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
+export LIBS += -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
check-packages-failed:
$(warning Can not find some of the required libraries)
diff --git a/soccr/soccr.c b/soccr/soccr.c
index 31cb870..211b9e6 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -4,6 +4,9 @@
#include <sys/ioctl.h>
#include <errno.h>
#include <linux/sockios.h>
+#include <libnet.h>
+#include <assert.h>
+
#include "soccr.h"
#ifndef SIOCOUTQNSD
@@ -11,6 +14,20 @@
#define SIOCOUTQNSD 0x894B
#endif
+enum {
+ TCPF_ESTABLISHED = (1 << 1),
+ TCPF_SYN_SENT = (1 << 2),
+ TCPF_SYN_RECV = (1 << 3),
+ TCPF_FIN_WAIT1 = (1 << 4),
+ TCPF_FIN_WAIT2 = (1 << 5),
+ TCPF_TIME_WAIT = (1 << 6),
+ TCPF_CLOSE = (1 << 7),
+ TCPF_CLOSE_WAIT = (1 << 8),
+ TCPF_LAST_ACK = (1 << 9),
+ TCPF_LISTEN = (1 << 10),
+ TCPF_CLOSING = (1 << 11),
+};
+
static void (*log)(unsigned int loglevel, const char *format, ...)
__attribute__ ((__format__ (__printf__, 2, 3)));
static unsigned int log_level = 0;
@@ -90,6 +107,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
switch (ti->tcpi_state) {
case TCP_ESTABLISHED:
+ case TCP_FIN_WAIT1:
+ case TCP_FIN_WAIT2:
+ case TCP_LAST_ACK:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSING:
case TCP_CLOSE:
break;
default:
@@ -97,7 +119,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
return -1;
}
- data->state = TCP_ESTABLISHED;
+ data->state = ti->tcpi_state;
if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
logerr("Unable to get size of snd queue");
@@ -113,6 +135,17 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
data->unsq_len = size;
+ /* Don't account the fin packet. It doesn't countain real data. */
+ if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK |
+ TCPF_CLOSING | TCPF_CLOSE)) {
+ if (data->outq_len)
+ data->outq_len--;
+ else
+ data->flags |= SOCCR_FLAGS_ACKED_FIN;
+ data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
+ }
+
+
if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
logerr("Unable to get size of recv queue");
return -1;
@@ -330,6 +363,7 @@ static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
struct libsoccr_sk_data *data, unsigned data_size)
{
+ int mstate = 1 << data->state;
struct tcp_repair_opt opts[4];
int addr_size;
int onr = 0;
@@ -337,9 +371,17 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
if (!data || data_size < SOCR_DATA_MIN_SIZE)
return -1;
- if (data->state != TCP_ESTABLISHED)
+ if (data->state == TCP_LISTEN)
return -1;
+ if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE | TCPF_CLOSING))
+ data->inq_seq--;
+
+ /* outq_seq is adjusted due to not accointing the fin packet */
+ if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
+ TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
+ data->outq_seq--;
+
if (set_queue_seq(sk, TCP_RECV_QUEUE,
data->inq_seq - data->inq_len))
return -2;
@@ -403,9 +445,106 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
return 0;
}
+static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size, uint8_t flags)
+{
+ int ret, exit_code = -1;
+ char errbuf[LIBNET_ERRBUF_SIZE];
+ int mark = SOCCR_MARK;;
+ int libnet_type;
+ libnet_t *l;
+
+ if (data->dst_addr.sa.sa_family == AF_INET6)
+ libnet_type = LIBNET_RAW6;
+ else
+ libnet_type = LIBNET_RAW4;
+
+ l = libnet_init(
+ libnet_type, /* injection type */
+ NULL, /* network interface */
+ errbuf); /* errbuf */
+ if (l == NULL)
+ return -1;
+
+ if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
+ goto err;
+
+ ret = libnet_build_tcp(
+ ntohs(data->dst_addr.sin.sin_port), /* source port */
+ ntohs(data->src_addr.sin.sin_port), /* destination port */
+ data->inq_seq, /* sequence number */
+ data->outq_seq - data->outq_len, /* acknowledgement num */
+ flags, /* control flags */
+ data->rcv_wnd, /* window size */
+ 0, /* checksum */
+ 10, /* urgent pointer */
+ LIBNET_TCP_H + 20, /* TCP packet size */
+ NULL, /* payload */
+ 0, /* payload size */
+ l, /* libnet handle */
+ 0); /* libnet id */
+ if (ret == -1) {
+ loge("Can't build TCP header: %s\n", libnet_geterror(l));
+ goto err;
+ }
+
+ if (data->dst_addr.sa.sa_family == AF_INET6) {
+ struct libnet_in6_addr src, dst;
+
+ memcpy(&dst, &data->dst_addr.sin6.sin6_addr, sizeof(dst));
+ memcpy(&src, &data->src_addr.sin6.sin6_addr, sizeof(src));
+
+ ret = libnet_build_ipv6(
+ 0, 0,
+ LIBNET_TCP_H, /* length */
+ IPPROTO_TCP, /* protocol */
+ 64, /* hop limit */
+ dst, /* source IP */
+ src, /* destination IP */
+ NULL, /* payload */
+ 0, /* payload size */
+ l, /* libnet handle */
+ 0); /* libnet id */
+ } else if (data->dst_addr.sa.sa_family == AF_INET)
+ ret = libnet_build_ipv4(
+ LIBNET_IPV4_H + LIBNET_TCP_H + 20, /* length */
+ 0, /* TOS */
+ 242, /* IP ID */
+ 0, /* IP Frag */
+ 64, /* TTL */
+ IPPROTO_TCP, /* protocol */
+ 0, /* checksum */
+ data->dst_addr.sin.sin_addr.s_addr, /* source IP */
+ data->src_addr.sin.sin_addr.s_addr, /* destination IP */
+ NULL, /* payload */
+ 0, /* payload size */
+ l, /* libnet handle */
+ 0); /* libnet id */
+ else {
+ loge("Unknown socket family");
+ goto err;
+ }
+ if (ret == -1) {
+ loge("Can't build IP header: %s\n", libnet_geterror(l));
+ goto err;
+ }
+
+ ret = libnet_write(l);
+ if (ret == -1) {
+ loge("Unable to send a fin packet: %s", libnet_geterror(l));
+ goto err;
+ }
+
+ exit_code = 0;
+err:
+ libnet_destroy(l);
+ return exit_code;
+}
+
int libsoccr_set_sk_data(struct libsoccr_sk *sk,
struct libsoccr_sk_data *data, unsigned data_size)
{
+ int mstate = 1 << data->state;
+
if (data->flags & SOCCR_FLAGS_WINDOW) {
struct tcp_repair_window wopt = {
.snd_wl1 = data->snd_wl1,
@@ -414,13 +553,45 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
.rcv_wnd = data->rcv_wnd,
.rcv_wup = data->rcv_wup,
};
-
+
+ if (mstate & (TCPF_CLOSE_WAIT |
+ TCPF_LAST_ACK | TCPF_CLOSE | TCPF_CLOSING)) {
+ wopt.rcv_wup--;
+ wopt.rcv_wnd++;
+ }
+
if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
logerr("Unable to set window parameters");
return -1;
}
}
+ /*
+ * To restore a half closed sockets, fin packets has to be restored in
+ * recv and send queues. Here shutdown() is used to restore a fin
+ * packet in the send queue and a fake fin packet is send to restore it
+ * in the recv queue.
+ */
+ if (data->state == TCP_CLOSING)
+ shutdown(sk->fd, SHUT_WR);
+
+ /* Send a fin packet to the socket to restore it in a receive queue. */
+ if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE | TCPF_CLOSING)) {
+ if (send_fin(sk->fd, data, data_size, TH_ACK | TH_FIN) < 0)
+ return -1;
+ data->inq_seq++;
+ }
+
+ if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
+ TCPF_FIN_WAIT2 | TCPF_CLOSE))
+ shutdown(sk->fd, SHUT_WR);
+
+ if ((mstate & (TCPF_CLOSE)) && (data->flags & SOCCR_FLAGS_ACKED_FIN)) {
+ data->outq_seq++;
+ if (send_fin(sk->fd, data, data_size, TH_ACK) < 0)
+ return -1;
+ }
+
return 0;
}
diff --git a/soccr/soccr.h b/soccr/soccr.h
index 8bd0377..ca0d0b3 100644
--- a/soccr/soccr.h
+++ b/soccr/soccr.h
@@ -6,6 +6,9 @@
#include "config.h"
+/* All packets with this mark have not to be blocked. */
+#define SOCCR_MARK 0xC114
+
#ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
struct tcp_repair_window {
uint32_t snd_wl1;
@@ -82,7 +85,8 @@ struct libsoccr_sk_data {
__u32 timestamp;
__u32 flags; /* SOCCR_FLAGS_... below */
- __u32 snd_wl1;
+
+ __u32 snd_wl1; /* SOCCR_FLAGS_WINDOW */
__u32 snd_wnd;
__u32 max_window;
__u32 rcv_wnd;
@@ -109,6 +113,11 @@ struct libsoccr_sk_data {
#define SOCCR_FLAGS_WINDOW 0x1
/*
+ * The outgoing fin was acked.
+ */
+#define SOCCR_FLAGS_ACKED_FIN 0x2
+
+/*
* These two calls pause and resume the socket for and after C/R
* The first one returns an opaque handle that is to be used by all
* the subsequent calls.
--
2.7.4
More information about the CRIU
mailing list