[CRIU] [PATCH 2/3] net: set the IP_CT_TCP_FLAG_BE_LIBERAL flag for conntracks

Andrey Vagin avagin at openvz.org
Thu Feb 4 11:36:15 PST 2016


From: Andrew Vagin <avagin at virtuozzo.com>

Currently the kernel doesn't report sequence numbers for contracks
and tries to restore them from first packets.
When we are restoring a tcp connection, we send a window probe and
set seq - 1 in it to get ack immediatly.

        /* Use a previous sequence.  This should cause the other
         * end to send an ack.  Don't queue or clone SKB, just
         * send it.
         */
        tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);

But conntrack doesn't like this, because then we get ack, which is greater than seq.
It looks like we try to ack data which we haven't received yet.

[  735.528073] td_maxwin == 0
               seq=1081132048 ack=2965916432+(0) sack=2965916432+(0) win=342 end=1081132048
               tcp_in_window: sender end=0 maxend=0 maxwin=0 scale=0 receiver end=0 maxend=0 maxwin=0 scale=0
[  735.533409] log_invalid:
               seq=2965916431 ack=1081132049+(0) sack=1081132049+(0) win=342 end=2965916431
               tcp_in_window: sender end=2965916431 maxend=2965916773 maxwin=342 scale=0 receiver end=1081132048 maxend=1081132390 maxwin=342 scale=0
[  735.537651] nf_ct_tcp: ACK is over the upper bound (ACKed data not seen yet)

The kernel sets IP_CT_TCP_FLAG_BE_LIBERAL for new conntracks,
if we are in the middle of a connection.

Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 Makefile |  6 +++---
 net.c    | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 8a0a230..284f67f 100644
--- a/Makefile
+++ b/Makefile
@@ -141,12 +141,12 @@ ifneq ($(PIEGEN),no)
 endif
 endif
 
-cflags-y		+= -iquote include -iquote pie -iquote .
+cflags-y		+= -iquote include -iquote pie -iquote . -I/usr/include/libnl3
 cflags-y		+= -iquote $(ARCH_DIR) -iquote $(ARCH_DIR)/include
 cflags-y		+= -fno-strict-aliasing
 export cflags-y
 
-LIBS		:= -lrt -lpthread -lprotobuf-c -ldl
+LIBS		:= -lrt -lpthread -lprotobuf-c -ldl -lnl-3
 
 DEFINES		+= -D_FILE_OFFSET_BITS=64
 DEFINES		+= -D_GNU_SOURCE
@@ -161,7 +161,7 @@ ifeq ($(DEBUG),1)
 	DEFINES += -DCR_DEBUG
 	CFLAGS	+= -O0 -ggdb3
 else
-	CFLAGS	+= -O2
+	CFLAGS	+= -O2 -g
 endif
 
 ifeq ($(GMON),1)
diff --git a/net.c b/net.c
index c611f61..6d7e541 100644
--- a/net.c
+++ b/net.c
@@ -12,6 +12,7 @@
 #include <sys/mount.h>
 #include <net/if.h>
 #include <linux/sockios.h>
+#include <libnl3/netlink/msg.h>
 
 #include "imgset.h"
 #include "syscall-types.h"
@@ -360,6 +361,54 @@ static int dump_one_nf(struct nlmsghdr *hdr, void *arg)
 	return 0;
 }
 
+static int ct_restore_callback(struct nlmsghdr *nlh)
+{
+	struct nfgenmsg *msg;
+	struct nlattr *tb[CTA_MAX+1], *tbp[CTA_PROTOINFO_MAX + 1], *tb_tcp[CTA_PROTOINFO_TCP_MAX+1];
+	int err;
+
+	msg = NLMSG_DATA(nlh);
+
+	if (msg->nfgen_family != AF_INET && msg->nfgen_family != AF_INET6)
+		return 0;
+
+	err = nlmsg_parse(nlh, sizeof(struct nfgenmsg), tb, CTA_MAX, NULL);
+	if (err < 0)
+		return -1;
+
+	if (!tb[CTA_PROTOINFO])
+		return 0;
+
+	err = nla_parse_nested(tbp, CTA_PROTOINFO_MAX, tb[CTA_PROTOINFO], NULL);
+	if (err < 0)
+		return -1;
+
+	if (!tbp[CTA_PROTOINFO_TCP])
+		return 0;
+
+	err = nla_parse_nested(tb_tcp, CTA_PROTOINFO_TCP_MAX, tbp[CTA_PROTOINFO_TCP], NULL);
+	if (err < 0)
+		return -1;
+
+	if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
+		struct nf_ct_tcp_flags *flags;
+
+		flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
+		flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+		flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
+	if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
+		struct nf_ct_tcp_flags *flags;
+
+		flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
+		flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+		flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
+	}
+
+	return 0;
+}
+
 static int restore_nf_ct(int pid, int type)
 {
 	struct nlmsghdr *nlh = NULL;
@@ -405,6 +454,10 @@ static int restore_nf_ct(int pid, int type)
 			goto out;
 		}
 
+		if (type == CR_FD_NETNF_CT)
+			if (ct_restore_callback(nlh))
+				goto out;
+
 		nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK|NLM_F_CREATE;
 		ret = do_rtnl_req(sk, nlh, nlh->nlmsg_len, NULL, NULL, NULL);
 		if (ret)
-- 
2.4.3



More information about the CRIU mailing list