[CRIU] [PATCH 03/11] soccr: add support for half-closed sockets
Andrei Vagin
avagin at virtuozzo.com
Mon Nov 21 17:45:20 PST 2016
On Mon, Nov 21, 2016 at 10:36:29PM +0300, Pavel Emelyanov wrote:
> On 11/11/2016 10:10 AM, Andrei Vagin wrote:
> > From: Andrei Vagin <avagin at virtuozzo.com>
> >
> > A socket is in one of half-closed states, if it sent a fin packet
> > or it received a fin packet.
> >
> > CRIU plays with fin packets to restore half-closed states too.
> >
> > When we need to sent a fin packet from a socket, we can call
> > shutdown(SHUT_WR). When a fin packet has to be restore in
> > a received queue, criu generate a fin packet and send it via
> > a raw ip socket.
> >
> > A raw packet is sent with the SOCCR_MARK mark to be able
> > to not block it.
> >
> > Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
> > ---
> > criu/Makefile.packages | 2 +-
> > soccr/soccr.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++-
> > soccr/soccr.h | 18 +++++-
> > 3 files changed, 180 insertions(+), 5 deletions(-)
> >
> > diff --git a/criu/Makefile.packages b/criu/Makefile.packages
> > index 53fbdae..886394f 100644
> > --- a/criu/Makefile.packages
> > +++ b/criu/Makefile.packages
> > @@ -19,7 +19,7 @@ REQ-DEB-PKG-NAMES += libcap-dev
> >
> > REQ-DEB-PKG-TEST-NAMES += libaio-dev
> >
> > -export LIBS += -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/
> > +export LIBS += -lrt -lpthread -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet
> >
> > check-packages-failed:
> > $(warning Can not find some of the required libraries)
> > diff --git a/soccr/soccr.c b/soccr/soccr.c
> > index 7c6bfb1..af39eda 100644
> > --- a/soccr/soccr.c
> > +++ b/soccr/soccr.c
> > @@ -4,6 +4,9 @@
> > #include <sys/ioctl.h>
> > #include <errno.h>
> > #include <linux/sockios.h>
> > +#include <libnet.h>
> > +#include <assert.h>
> > +
> > #include "soccr.h"
> >
> > #ifndef SIOCOUTQNSD
> > @@ -11,6 +14,20 @@
> > #define SIOCOUTQNSD 0x894B
> > #endif
> >
> > +enum {
> > + TCPF_ESTABLISHED = (1 << 1),
> > + TCPF_SYN_SENT = (1 << 2),
> > + TCPF_SYN_RECV = (1 << 3),
> > + TCPF_FIN_WAIT1 = (1 << 4),
> > + TCPF_FIN_WAIT2 = (1 << 5),
> > + TCPF_TIME_WAIT = (1 << 6),
> > + TCPF_CLOSE = (1 << 7),
> > + TCPF_CLOSE_WAIT = (1 << 8),
> > + TCPF_LAST_ACK = (1 << 9),
> > + TCPF_LISTEN = (1 << 10),
> > + TCPF_CLOSING = (1 << 11),
> > +};
> > +
> > static void (*log)(unsigned int loglevel, const char *format, ...)
> > __attribute__ ((__format__ (__printf__, 2, 3)));
> > static unsigned int log_level = 0;
> > @@ -89,6 +106,11 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> >
> > switch (ti->tcpi_state) {
> > case TCP_ESTABLISHED:
> > + case TCP_FIN_WAIT1:
> > + case TCP_FIN_WAIT2:
> > + case TCP_LAST_ACK:
> > + case TCP_CLOSE_WAIT:
> > + case TCP_CLOSING:
> > case TCP_CLOSE:
> > break;
> > default:
> > @@ -96,7 +118,7 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> > return -1;
> > }
> >
> > - data->state = TCP_ESTABLISHED;
> > + data->state = ti->tcpi_state;
> >
> > if (ioctl(sk->fd, SIOCOUTQ, &size) == -1) {
> > loge("Unable to get size of snd queue");
> > @@ -112,6 +134,14 @@ static int refresh_sk(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, str
> >
> > data->unsq_len = size;
> >
> > + /* Don't account the fin packet. It doesn't countain real data. */
> > + if ((1 << data->state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK | TCPF_CLOSING)) {
> > + assert(data->outq_len > 0);
>
> assert?! We have BUG_ON-s in criu for this, don't we?
soccr is a separate libary.
>
> > + data->outq_len--;
> > + data->unsq_len = data->unsq_len ? data->unsq_len - 1 : 0;
> > + }
> > +
> > +
> > if (ioctl(sk->fd, SIOCINQ, &size) == -1) {
> > loge("Unable to get size of recv queue");
> > return -1;
> > @@ -325,12 +355,22 @@ static int set_queue_seq(struct libsoccr_sk *sk, int queue, __u32 seq)
> > int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk,
> > struct libsoccr_sk_data *data, unsigned data_size)
> > {
> > + int mstate = 1 << data->state;
> > +
> > if (!data || data_size < SOCR_DATA_MIN_SIZE)
> > return -1;
> >
> > - if (data->state != TCP_ESTABLISHED)
> > + if (data->state == TCP_LISTEN)
> > return -1;
>
> This doesn't look correct, we don't support more states here.
I don't understand what you want to say here. This set adds support for
other states.
>
> > + if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE))
> > + data->inq_seq--;
> > +
> > + /* outq_seq is adjusted due to not accointing the fin packet */
> > + if (mstate & (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 |
> > + TCPF_LAST_ACK | TCPF_CLOSING | TCPF_CLOSE))
> > + data->outq_seq--;
> > +
> > if (set_queue_seq(sk, TCP_RECV_QUEUE,
> > data->inq_seq - data->inq_len))
> > return -2;
> > @@ -400,6 +440,98 @@ int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk,
> > return 0;
> > }
> >
> > +static int send_fin(int sk, struct libsoccr_sk_data *data, unsigned data_size)
> > +{
> > + int ret, exit_code = -1;
> > + char errbuf[LIBNET_ERRBUF_SIZE];
> > + int mark = SOCCR_MARK;;
> > + int libnet_type;
> > + libnet_t *l;
> > +
> > + libnet_type = data->family == AF_INET6 ? LIBNET_RAW6 : LIBNET_RAW4;
> > +
> > + l = libnet_init(
> > + libnet_type, /* injection type */
> > + NULL, /* network interface */
> > + errbuf); /* errbuf */
> > + if (l == NULL)
> > + return -1;
> > +
> > + if (setsockopt(l->fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)))
> > + goto err;
> > +
> > + ret = libnet_build_tcp(
> > + data->dst_port, /* source port */
> > + data->src_port, /* destination port */
> > + data->inq_seq, /* sequence number */
> > + data->outq_seq - data->outq_len, /* acknowledgement num */
> > + TH_FIN | TH_ACK, /* control flags */
> > + data->rcv_wnd, /* window size */
> > + 0, /* checksum */
> > + 10, /* urgent pointer */
> > + LIBNET_TCP_H + 20, /* TCP packet size */
> > + NULL, /* payload */
> > + 0, /* payload size */
> > + l, /* libnet handle */
> > + 0); /* libnet id */
> > + if (ret == -1) {
> > + loge("Can't build TCP header: %s\n", libnet_geterror(l));
> > + goto err;
> > + }
> > +
> > + if (data->family == AF_INET6) {
> > + struct libnet_in6_addr src, dst;
> > +
> > + memcpy(&dst, data->dst_addr, sizeof(dst));
> > + memcpy(&src, data->src_addr, sizeof(src));
> > +
> > + ret = libnet_build_ipv6(
> > + 0, 0,
> > + LIBNET_TCP_H, /* length */
> > + IPPROTO_TCP, /* protocol */
> > + 64, /* hop limit */
> > + dst, /* source IP */
> > + src, /* destination IP */
> > + NULL, /* payload */
> > + 0, /* payload size */
> > + l, /* libnet handle */
> > + 0); /* libnet id */
> > + } else if (data->family == AF_INET)
> > + ret = libnet_build_ipv4(
> > + LIBNET_IPV4_H + LIBNET_TCP_H + 20, /* length */
> > + 0, /* TOS */
> > + 242, /* IP ID */
> > + 0, /* IP Frag */
> > + 64, /* TTL */
> > + IPPROTO_TCP, /* protocol */
> > + 0, /* checksum */
> > + data->dst_addr[0], /* source IP */
> > + data->src_addr[0], /* destination IP */
> > + NULL, /* payload */
> > + 0, /* payload size */
> > + l, /* libnet handle */
> > + 0); /* libnet id */
> > + else {
> > + loge("Unknown socket family");
> > + goto err;
> > + }
> > + if (ret == -1) {
> > + loge("Can't build IP header: %s\n", libnet_geterror(l));
> > + goto err;
> > + }
> > +
> > + ret = libnet_write(l);
> > + if (ret == -1) {
> > + loge("Unable to send a fin packet: %s", libnet_geterror(l));
> > + goto err;
> > + }
> > +
> > + exit_code = 0;
> > +err:
> > + libnet_destroy(l);
> > + return exit_code;
> > +}
> > +
> > int libsoccr_set_sk_data(struct libsoccr_sk *sk,
> > struct libsoccr_sk_data *data, unsigned data_size)
> > {
> > @@ -411,13 +543,40 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
> > .rcv_wnd = data->rcv_wnd,
> > .rcv_wup = data->rcv_wup,
> > };
> > -
> > +
> > + if ((1 << data->state) & ((1 << TCP_CLOSE_WAIT) |
> > + (1 << TCP_LAST_ACK) |
> > + (1 << TCP_CLOSE))) {
> > + wopt.rcv_wup--;
> > + wopt.rcv_wnd++;
> > + }
> > +
> > if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_WINDOW, &wopt, sizeof(wopt))) {
> > loge("Unable to set window parameters");
> > return -1;
> > }
> > }
> >
> > + if (data->flags & SOCCR_FLAGS_ADDR) {
> > + int mstate = 1 << data->state;
> > +
> > + if (data->state == TCP_CLOSING) {
> > + shutdown(sk->fd, SHUT_WR);
> > + }
> > + if (mstate & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSE)) {
> > + if (send_fin(sk->fd, data, data_size) < 0)
> > + return -1;
> > + }
> > +
> > + if (mstate & (TCPF_LAST_ACK | TCPF_FIN_WAIT1 |
> > + TCPF_FIN_WAIT2 | TCPF_CLOSE)) {
> > + shutdown(sk->fd, SHUT_WR);
> > + }
> > + } else if (data->state != TCP_ESTABLISHED) {
> > + loge("Unable to restore a socket state: %d", data->state);
> > + return -1;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/soccr/soccr.h b/soccr/soccr.h
> > index 4e272d5..a3a950b 100644
> > --- a/soccr/soccr.h
> > +++ b/soccr/soccr.h
> > @@ -5,6 +5,9 @@
> >
> > #include "config.h"
> >
> > +/* All packets with this mark have not to be blocked. */
> > +#define SOCCR_MARK 0xC114
> > +
> > #ifndef CONFIG_HAS_TCP_REPAIR_WINDOW
> > struct tcp_repair_window {
> > uint32_t snd_wl1;
> > @@ -75,11 +78,18 @@ struct libsoccr_sk_data {
> > __u32 timestamp;
> >
> > __u32 flags; /* SOCCR_FLAGS_... below */
> > - __u32 snd_wl1;
> > +
> > + __u32 snd_wl1; /* SOCCR_FLAGS_WINDOW */
> > __u32 snd_wnd;
> > __u32 max_window;
> > __u32 rcv_wnd;
> > __u32 rcv_wup;
> > +
> > + __u32 family; /* SOCCR_FLAGS_ADDR */
> > + __u32 src_port;
> > + __u32 dst_port;
> > + __u32 src_addr[4];
> > + __u32 dst_addr[4];
>
> I cannot find where libsoccr initializes these values.
It's initialized from criu.
>
> > };
> >
> > /*
> > @@ -99,6 +109,12 @@ struct libsoccr_sk_data {
> > #define SOCCR_FLAGS_WINDOW 0x1
> >
> > /*
> > + * Source and destination addresses, which are required to restore
> > + * a socket state.
> > + */
> > +#define SOCCR_FLAGS_ADDR 0x2
> > +
> > +/*
> > * These two calls pause and resume the socket for and after C/R
> > * The first one returns an opaque handle that is to be used by all
> > * the subsequent calls.
> >
>
More information about the CRIU
mailing list