[CRIU] [PATCH] inet: tcp -- Find size of max write memory allowed to restore TCP data
Pavel Emelyanov
xemul at parallels.com
Mon Oct 7 06:11:50 PDT 2013
On 10/07/2013 03:49 PM, Cyrill Gorcunov wrote:
> The maximal size which may be used in the kernel for sending TCP data
> on restore is varies depending on how many memory installed on the
> system, moreover the memory allocated for "read queue" is bigger than
> used for "write queue". Thus when we checkpointed a big slab of data
> we need to figure out which size is allowed for sending data on restore.
>
> For this we read /proc/sys/net/ipv4/tcp_wmem on restore and calculate
> the size needed, then we simply chop data to segements and send it
> in a loop.
>
> Typical output on restore is something like
>
> | (00.012001) 17471: TCP write queue memory limit is 2097152
>
> https://bugzilla.openvz.org/show_bug.cgi?id=2751
>
> Reported-by: Andrey Vagin <avagin at openvz.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> cr-restore.c | 3 +++
> include/sk-inet.h | 2 ++
> sk-tcp.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 3 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index ddee815..587d261 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1245,6 +1245,9 @@ static int restore_task_with_children(void *_arg)
> if (mount_proc())
> exit(1);
>
> + if (tcp_read_sysctl_limits())
> + exit(1);
> +
> if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
> exit(1);
>
> diff --git a/include/sk-inet.h b/include/sk-inet.h
> index a3dff73..030c15a 100644
> --- a/include/sk-inet.h
> +++ b/include/sk-inet.h
> @@ -79,4 +79,6 @@ int restore_one_tcp(int sk, struct inet_sk_info *si);
> int check_tcp(void);
> extern int rst_tcp_socks_add(int fd, bool reuseaddr);
>
> +extern int tcp_read_sysctl_limits(void);
> +
> #endif /* __CR_SK_INET_H__ */
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 0d4fcfc..8a6c179 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -57,6 +57,51 @@ enum {
> static LIST_HEAD(cpt_tcp_repair_sockets);
> static LIST_HEAD(rst_tcp_repair_sockets);
>
> +/*
> + * Strictly speaking, if there is a machine with huge amount
> + * of memory, we're allowed to send up to 4M of tcp data at
> + * once. But we will figure out precise size of a limit a bit
> + * later when restore starts. Meanwhile set it up to 2M, which
> + * is safe enough to proceed without errors.
> + */
> +static int max_wshare = 2U << 20;
> +
> +int tcp_read_sysctl_limits(void)
> +{
> + const char path[] = "/proc/sys/net/ipv4/tcp_wmem";
> + int fd, n, limit = max_wshare;
> + int vect[3] = { };
> + char buf[512];
> +
> + /*
> + * Lets figure out which exactly amount of memory is
> + * availabe for sending data on restore. If for some
> + * reason we fail -- just setup 1M wich is small enough
> + * to proceed without errors.
> + */
> + fd = open("/proc/sys/net/ipv4/tcp_wmem", O_RDONLY);
There's sysctl engine for reading sysctls.
Other than this -- the read queue restore would require other sysctl read, won't it?
> + if (fd < 0) {
> + pr_warn("Can't open %s: %m\n", path);
> + return 0;
> + }
> +
> + if (read(fd, buf, sizeof(buf) > 0)) {
> + n = sscanf(buf, "%d\t%d\t%d", &vect[0], &vect[1], &vect[2]);
> + if (n == 3)
> + limit = min(max_wshare, vect[2]);
> + } else
> + pr_warn("Can't read %s: %m\n", path);
> + close(fd);
> +
> + if (limit < 128)
> + pr_warn("The memory limit for TCP write queue "
> + "(%s) is suspiciously small %d\n", path, limit);
> + max_wshare = limit;
> +
> + pr_debug("TCP write queue memory limit is %d\n", max_wshare);
> + return 0;
> +}
> +
> static int tcp_repair_on(int fd)
> {
> int ret, aux = 1;
> @@ -445,6 +490,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> {
> int ret, err = -1;
> char *buf;
> + int off;
>
> pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
>
> @@ -460,11 +506,18 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> if (read_img_buf(imgfd, buf, len) < 0)
> goto err;
>
> - ret = send(sk, buf, len, 0);
> - if (ret != len) {
> - pr_perror("Can't restore %d queue data (%d), want %d",
> - queue, ret, len);
> - goto err;
> + off = 0;
> + while (len) {
> + int chunk = len > max_wshare ? max_wshare : len;
> +
> + ret = send(sk, buf + off, chunk, 0);
> + if (ret != chunk) {
> + pr_perror("Can't restore %d queue data (%d), want (%d:%d)",
> + queue, ret, chunk, len);
> + goto err;
> + }
> + off += chunk;
> + len -= chunk;
> }
>
> err = 0;
>
More information about the CRIU
mailing list