[CRIU] [PATCH 2/2] tcp: restore the boundary between sent and unsent data

Andrey Vagin avagin at openvz.org
Wed Nov 13 13:01:57 PST 2013


All data in a write buffer can be divided on two parts sent but not yet
acknowledged data and unsent data.

Currently the boundary between sent and unsent data is not dumped and
all the data are restored as if they have already been sent.
This methode can provoke long delays in tcp connection, because a kernel
can wait before retransmitting data.
https://bugzilla.openvz.org/show_bug.cgi?id=2808

The TCP stack must know which data have been sent, because
acknowledgment can be received for them. These data must be restored in
repair mode.

The second part of data have never been sent out, so they can be
restored without any tricks. These data can be sent into socket as
usual.

For restoring unsent data the repair mode is disabled for socket,
but it is enabled back after restoring data. It will be disabled
after unlocking network. In this case window probe is sent, which is
required for waknge the connection.

This patch fixes long delays in tcp connections after dumping and
restoring.

https://bugzilla.openvz.org/show_bug.cgi?id=2808
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 sk-tcp.c | 50 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/sk-tcp.c b/sk-tcp.c
index 0db83a3..baf3221 100644
--- a/sk-tcp.c
+++ b/sk-tcp.c
@@ -452,19 +452,12 @@ static int restore_tcp_seqs(int sk, TcpStreamEntry *tse)
 	return 0;
 }
 
-static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
+static int __send_tcp_queue(int sk, int queue, u32 len, int imgfd)
 {
 	int ret, err = -1;
 	int off, max;
 	char *buf;
 
-	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
-
-	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
-		pr_perror("Can't set repair queue");
-		return -1;
-	}
-
 	buf = xmalloc(len);
 	if (!buf)
 		return -1;
@@ -494,16 +487,49 @@ err:
 	return err;
 }
 
+static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
+{
+	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
+		pr_perror("Can't set repair queue");
+		return -1;
+	}
+
+	return __send_tcp_queue(sk, queue, len, imgfd);
+}
+
 static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd)
 {
+	u32 len;
+
 	if (restore_prepare_socket(sk))
 		return -1;
 
-	if (tse->inq_len &&
-			send_tcp_queue(sk, TCP_RECV_QUEUE, tse->inq_len, fd))
+	len = tse->inq_len;
+	if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, fd))
+		return -1;
+
+	/*
+	 * All data in a write buffer can be divided on two parts sent
+	 * but not yet acknowledged data and unsent data.
+	 * The TCP stack must know which data have been sent, because
+	 * acknowledgment can be received for them. These data must be
+	 * restored in repair mode.
+	 */
+	len = tse->outq_len - tse->unsq_len;
+	if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
+		return -1;
+
+	/*
+	 * The second part of data have never been sent to outside, so
+	 * they can be restored without any tricks.
+	 */
+	len = tse->unsq_len;
+	tcp_repair_off(sk);
+	if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
 		return -1;
-	if (tse->outq_len &&
-			send_tcp_queue(sk, TCP_SEND_QUEUE, tse->outq_len, fd))
+	if (tcp_repair_on(sk))
 		return -1;
 
 	return 0;
-- 
1.8.3.1



More information about the CRIU mailing list