[CRIU] [PATCH] inet: tcp -- Find size of max write memory allowed to restore TCP data

Cyrill Gorcunov gorcunov at openvz.org
Mon Oct 7 04:49:49 PDT 2013


The maximal size which may be used in the kernel for sending TCP data
on restore is varies depending on how many memory installed on the
system, moreover the memory allocated for "read queue" is bigger than
used for "write queue". Thus when we checkpointed a big slab of data
we need to figure out which size is allowed for sending data on restore.

For this we read /proc/sys/net/ipv4/tcp_wmem on restore and calculate
the size needed, then we simply chop data to segements and send it
in a loop.

Typical output on restore is something like

 | (00.012001)  17471: TCP write queue memory limit is 2097152

https://bugzilla.openvz.org/show_bug.cgi?id=2751

Reported-by: Andrey Vagin <avagin at openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 cr-restore.c      |  3 +++
 include/sk-inet.h |  2 ++
 sk-tcp.c          | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index ddee815..587d261 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1245,6 +1245,9 @@ static int restore_task_with_children(void *_arg)
 		if (mount_proc())
 			exit(1);
 
+		if (tcp_read_sysctl_limits())
+			exit(1);
+
 		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
 			exit(1);
 
diff --git a/include/sk-inet.h b/include/sk-inet.h
index a3dff73..030c15a 100644
--- a/include/sk-inet.h
+++ b/include/sk-inet.h
@@ -79,4 +79,6 @@ int restore_one_tcp(int sk, struct inet_sk_info *si);
 int check_tcp(void);
 extern int rst_tcp_socks_add(int fd, bool reuseaddr);
 
+extern int tcp_read_sysctl_limits(void);
+
 #endif /* __CR_SK_INET_H__ */
diff --git a/sk-tcp.c b/sk-tcp.c
index 0d4fcfc..8a6c179 100644
--- a/sk-tcp.c
+++ b/sk-tcp.c
@@ -57,6 +57,51 @@ enum {
 static LIST_HEAD(cpt_tcp_repair_sockets);
 static LIST_HEAD(rst_tcp_repair_sockets);
 
+/*
+ * Strictly speaking, if there is a machine with huge amount
+ * of memory, we're allowed to send up to 4M of tcp data at
+ * once. But we will figure out precise size of a limit a bit
+ * later when restore starts. Meanwhile set it up to 2M, which
+ * is safe enough to proceed without errors.
+ */
+static int max_wshare = 2U << 20;
+
+int tcp_read_sysctl_limits(void)
+{
+	const char path[] = "/proc/sys/net/ipv4/tcp_wmem";
+	int fd, n, limit = max_wshare;
+	int vect[3] = { };
+	char buf[512];
+
+	/*
+	 * Lets figure out which exactly amount of memory is
+	 * availabe for sending data on restore. If for some
+	 * reason we fail -- just setup 1M wich is small enough
+	 * to proceed without errors.
+	 */
+	fd = open("/proc/sys/net/ipv4/tcp_wmem", O_RDONLY);
+	if (fd < 0) {
+		pr_warn("Can't open %s: %m\n", path);
+		return 0;
+	}
+
+	if (read(fd, buf, sizeof(buf) > 0)) {
+		n = sscanf(buf, "%d\t%d\t%d", &vect[0], &vect[1], &vect[2]);
+		if (n == 3)
+			limit = min(max_wshare, vect[2]);
+	} else
+		pr_warn("Can't read %s: %m\n", path);
+	close(fd);
+
+	if (limit < 128)
+		pr_warn("The memory limit for TCP write queue "
+			"(%s) is suspiciously small %d\n", path, limit);
+	max_wshare = limit;
+
+	pr_debug("TCP write queue memory limit is %d\n", max_wshare);
+	return 0;
+}
+
 static int tcp_repair_on(int fd)
 {
 	int ret, aux = 1;
@@ -445,6 +490,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
 {
 	int ret, err = -1;
 	char *buf;
+	int off;
 
 	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
 
@@ -460,11 +506,18 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
 	if (read_img_buf(imgfd, buf, len) < 0)
 		goto err;
 
-	ret = send(sk, buf, len, 0);
-	if (ret != len) {
-		pr_perror("Can't restore %d queue data (%d), want %d",
-				queue, ret, len);
-		goto err;
+	off = 0;
+	while (len) {
+		int chunk = len > max_wshare ? max_wshare : len;
+
+		ret = send(sk, buf + off, chunk, 0);
+		if (ret != chunk) {
+			pr_perror("Can't restore %d queue data (%d), want (%d:%d)",
+				  queue, ret, chunk, len);
+			goto err;
+		}
+		off += chunk;
+		len -= chunk;
 	}
 
 	err = 0;
-- 
1.8.3.1



More information about the CRIU mailing list