[CRIU] [PATCH 2/3] net: set the IP_CT_TCP_FLAG_BE_LIBERAL flag for conntracks
Andrey Vagin
avagin at openvz.org
Thu Feb 4 11:36:15 PST 2016
From: Andrew Vagin <avagin at virtuozzo.com>
Currently the kernel doesn't report sequence numbers for contracks
and tries to restore them from first packets.
When we are restoring a tcp connection, we send a window probe and
set seq - 1 in it to get ack immediatly.
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
* send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
But conntrack doesn't like this, because then we get ack, which is greater than seq.
It looks like we try to ack data which we haven't received yet.
[ 735.528073] td_maxwin == 0
seq=1081132048 ack=2965916432+(0) sack=2965916432+(0) win=342 end=1081132048
tcp_in_window: sender end=0 maxend=0 maxwin=0 scale=0 receiver end=0 maxend=0 maxwin=0 scale=0
[ 735.533409] log_invalid:
seq=2965916431 ack=1081132049+(0) sack=1081132049+(0) win=342 end=2965916431
tcp_in_window: sender end=2965916431 maxend=2965916773 maxwin=342 scale=0 receiver end=1081132048 maxend=1081132390 maxwin=342 scale=0
[ 735.537651] nf_ct_tcp: ACK is over the upper bound (ACKed data not seen yet)
The kernel sets IP_CT_TCP_FLAG_BE_LIBERAL for new conntracks,
if we are in the middle of a connection.
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
Makefile | 6 +++---
net.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile
index 8a0a230..284f67f 100644
--- a/Makefile
+++ b/Makefile
@@ -141,12 +141,12 @@ ifneq ($(PIEGEN),no)
endif
endif
-cflags-y += -iquote include -iquote pie -iquote .
+cflags-y += -iquote include -iquote pie -iquote . -I/usr/include/libnl3
cflags-y += -iquote $(ARCH_DIR) -iquote $(ARCH_DIR)/include
cflags-y += -fno-strict-aliasing
export cflags-y
-LIBS := -lrt -lpthread -lprotobuf-c -ldl
+LIBS := -lrt -lpthread -lprotobuf-c -ldl -lnl-3
DEFINES += -D_FILE_OFFSET_BITS=64
DEFINES += -D_GNU_SOURCE
@@ -161,7 +161,7 @@ ifeq ($(DEBUG),1)
DEFINES += -DCR_DEBUG
CFLAGS += -O0 -ggdb3
else
- CFLAGS += -O2
+ CFLAGS += -O2 -g
endif
ifeq ($(GMON),1)
diff --git a/net.c b/net.c
index c611f61..6d7e541 100644
--- a/net.c
+++ b/net.c
@@ -12,6 +12,7 @@
#include <sys/mount.h>
#include <net/if.h>
#include <linux/sockios.h>
+#include <libnl3/netlink/msg.h>
#include "imgset.h"
#include "syscall-types.h"
@@ -360,6 +361,54 @@ static int dump_one_nf(struct nlmsghdr *hdr, void *arg)
return 0;
}
+static int ct_restore_callback(struct nlmsghdr *nlh)
+{
+ struct nfgenmsg *msg;
+ struct nlattr *tb[CTA_MAX+1], *tbp[CTA_PROTOINFO_MAX + 1], *tb_tcp[CTA_PROTOINFO_TCP_MAX+1];
+ int err;
+
+ msg = NLMSG_DATA(nlh);
+
+ if (msg->nfgen_family != AF_INET && msg->nfgen_family != AF_INET6)
+ return 0;
+
+ err = nlmsg_parse(nlh, sizeof(struct nfgenmsg), tb, CTA_MAX, NULL);
+ if (err < 0)
+ return -1;
+
+ if (!tb[CTA_PROTOINFO])
+ return 0;
+
+ err = nla_parse_nested(tbp, CTA_PROTOINFO_MAX, tb[CTA_PROTOINFO], NULL);
+ if (err < 0)
+ return -1;
+
+ if (!tbp[CTA_PROTOINFO_TCP])
+ return 0;
+
+ err = nla_parse_nested(tb_tcp, CTA_PROTOINFO_TCP_MAX, tbp[CTA_PROTOINFO_TCP], NULL);
+ if (err < 0)
+ return -1;
+
+ if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
+ struct nf_ct_tcp_flags *flags;
+
+ flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
+ flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
+ struct nf_ct_tcp_flags *flags;
+
+ flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
+ flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ return 0;
+}
+
static int restore_nf_ct(int pid, int type)
{
struct nlmsghdr *nlh = NULL;
@@ -405,6 +454,10 @@ static int restore_nf_ct(int pid, int type)
goto out;
}
+ if (type == CR_FD_NETNF_CT)
+ if (ct_restore_callback(nlh))
+ goto out;
+
nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK|NLM_F_CREATE;
ret = do_rtnl_req(sk, nlh, nlh->nlmsg_len, NULL, NULL, NULL);
if (ret)
--
2.4.3
More information about the CRIU
mailing list