[CRIU] [PATCH] page-server: Fine grained corking control

Pavel Emelyanov xemul at parallels.com
Mon Nov 9 03:24:01 PST 2015


When live migrating a container with large amount of processes
inside the time to do page-server-ed dump may be up to 10 times
slower than for the local dump.

The delay is always introduced in the open_page_server_xfer()
when criu negotiates the has_parent bit on the 2nd task. This
likely happens because of the Nagel algo taking place -- after
the write() of the OPEN2 command happened kernel delays this
command sending waiting for more data.

Fix this by turning NODELAY option on memory transfer sockets
on both sides, but CORK the socket before (and unCORK one after)
the actual memory transfer to let kernel merge pagemaps with
pages when possible.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

---
 include/util.h |  3 +++
 page-xfer.c    | 22 ++++++++++++++++++++--
 util.c         | 15 +++++++++++++++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/util.h b/include/util.h
index e815117..15513ae 100644
--- a/include/util.h
+++ b/include/util.h
@@ -263,5 +263,8 @@ int fd_has_data(int lfd);
 
 int make_yard(char *path);
 
+void tcp_nodelay(int sk, bool on);
+void tcp_cork(int sk, bool on);
+
 const char *ns_to_string(unsigned int ns);
 #endif /* __CR_UTIL_H__ */
diff --git a/page-xfer.c b/page-xfer.c
index e6e64d7..8a6b47c 100644
--- a/page-xfer.c
+++ b/page-xfer.c
@@ -13,7 +13,7 @@
 #include "image.h"
 #include "page-xfer.h"
 #include "page-pipe.h"
-
+#include "util.h"
 #include "protobuf.h"
 #include "protobuf/pagemap.pb-c.h"
 
@@ -183,6 +183,8 @@ static int page_server_serve(int sk)
 	int ret = -1;
 	bool flushed = false;
 
+	tcp_nodelay(sk, true);
+
 	if (pipe(cxfer.p)) {
 		pr_perror("Can't make pipe for xfer");
 		close(sk);
@@ -400,7 +402,7 @@ int connect_to_page_server(void)
 	if (opts.ps_socket != -1) {
 		page_server_sk = opts.ps_socket;
 		pr_info("Re-using ps socket %d\n", page_server_sk);
-		return 0;
+		goto out;
 	}
 
 	pr_info("Connecting to server %s:%u\n",
@@ -420,6 +422,8 @@ int connect_to_page_server(void)
 		return -1;
 	}
 
+out:
+	tcp_nodelay(page_server_sk, true);
 	return 0;
 }
 
@@ -512,6 +516,12 @@ static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov)
 
 static void close_server_xfer(struct page_xfer *xfer)
 {
+	/*
+	 * UnCORK the socket -- the next steps would be open_page_server_xfer
+	 * exchange which should again happen in NODELAY mode.
+	 */
+
+	tcp_cork(xfer->sk, false);
 	xfer->sk = -1;
 }
 
@@ -546,6 +556,14 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
 	if (has_parent)
 		xfer->parent = (void *) 1; /* This is required for generate_iovs() */
 
+	/*
+	 * CORK the socket now, since we plan to send a bunch
+	 * of pagemap + pages pairs and each piece of them worth
+	 * being merged with the others.
+	 */
+
+	tcp_cork(xfer->sk, true);
+
 	return 0;
 }
 
diff --git a/util.c b/util.c
index 337b0ed..0ea6352 100644
--- a/util.c
+++ b/util.c
@@ -28,6 +28,9 @@
 #include <sys/wait.h>
 #include <sys/resource.h>
 #include <sys/wait.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
 
 #include "compiler.h"
 #include "asm/types.h"
@@ -880,3 +883,15 @@ const char *ns_to_string(unsigned int ns)
 		return NULL;
 	}
 }
+
+void tcp_cork(int sk, bool on)
+{
+	int val = on ? 1 : 0;
+	setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val));
+}
+
+void tcp_nodelay(int sk, bool on)
+{
+	int val = on ? 1 : 0;
+	setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val));
+}
-- 
1.9.3



More information about the CRIU mailing list