[CRIU] [PATCH] inet: tcp -- Find size of max write memory allowed to restore TCP data

Pavel Emelyanov xemul at parallels.com
Mon Oct 7 06:11:50 PDT 2013


On 10/07/2013 03:49 PM, Cyrill Gorcunov wrote:
> The maximal size which may be used in the kernel for sending TCP data
> on restore is varies depending on how many memory installed on the
> system, moreover the memory allocated for "read queue" is bigger than
> used for "write queue". Thus when we checkpointed a big slab of data
> we need to figure out which size is allowed for sending data on restore.
> 
> For this we read /proc/sys/net/ipv4/tcp_wmem on restore and calculate
> the size needed, then we simply chop data to segements and send it
> in a loop.
> 
> Typical output on restore is something like
> 
>  | (00.012001)  17471: TCP write queue memory limit is 2097152
> 
> https://bugzilla.openvz.org/show_bug.cgi?id=2751
> 
> Reported-by: Andrey Vagin <avagin at openvz.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  cr-restore.c      |  3 +++
>  include/sk-inet.h |  2 ++
>  sk-tcp.c          | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 63 insertions(+), 5 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index ddee815..587d261 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1245,6 +1245,9 @@ static int restore_task_with_children(void *_arg)
>  		if (mount_proc())
>  			exit(1);
>  
> +		if (tcp_read_sysctl_limits())
> +			exit(1);
> +
>  		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
>  			exit(1);
>  
> diff --git a/include/sk-inet.h b/include/sk-inet.h
> index a3dff73..030c15a 100644
> --- a/include/sk-inet.h
> +++ b/include/sk-inet.h
> @@ -79,4 +79,6 @@ int restore_one_tcp(int sk, struct inet_sk_info *si);
>  int check_tcp(void);
>  extern int rst_tcp_socks_add(int fd, bool reuseaddr);
>  
> +extern int tcp_read_sysctl_limits(void);
> +
>  #endif /* __CR_SK_INET_H__ */
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 0d4fcfc..8a6c179 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -57,6 +57,51 @@ enum {
>  static LIST_HEAD(cpt_tcp_repair_sockets);
>  static LIST_HEAD(rst_tcp_repair_sockets);
>  
> +/*
> + * Strictly speaking, if there is a machine with huge amount
> + * of memory, we're allowed to send up to 4M of tcp data at
> + * once. But we will figure out precise size of a limit a bit
> + * later when restore starts. Meanwhile set it up to 2M, which
> + * is safe enough to proceed without errors.
> + */
> +static int max_wshare = 2U << 20;
> +
> +int tcp_read_sysctl_limits(void)
> +{
> +	const char path[] = "/proc/sys/net/ipv4/tcp_wmem";
> +	int fd, n, limit = max_wshare;
> +	int vect[3] = { };
> +	char buf[512];
> +
> +	/*
> +	 * Lets figure out which exactly amount of memory is
> +	 * availabe for sending data on restore. If for some
> +	 * reason we fail -- just setup 1M wich is small enough
> +	 * to proceed without errors.
> +	 */
> +	fd = open("/proc/sys/net/ipv4/tcp_wmem", O_RDONLY);

There's sysctl engine for reading sysctls.
Other than this -- the read queue restore would require other sysctl read, won't it?

> +	if (fd < 0) {
> +		pr_warn("Can't open %s: %m\n", path);
> +		return 0;
> +	}
> +
> +	if (read(fd, buf, sizeof(buf) > 0)) {
> +		n = sscanf(buf, "%d\t%d\t%d", &vect[0], &vect[1], &vect[2]);
> +		if (n == 3)
> +			limit = min(max_wshare, vect[2]);
> +	} else
> +		pr_warn("Can't read %s: %m\n", path);
> +	close(fd);
> +
> +	if (limit < 128)
> +		pr_warn("The memory limit for TCP write queue "
> +			"(%s) is suspiciously small %d\n", path, limit);
> +	max_wshare = limit;
> +
> +	pr_debug("TCP write queue memory limit is %d\n", max_wshare);
> +	return 0;
> +}
> +
>  static int tcp_repair_on(int fd)
>  {
>  	int ret, aux = 1;
> @@ -445,6 +490,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
>  {
>  	int ret, err = -1;
>  	char *buf;
> +	int off;
>  
>  	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
>  
> @@ -460,11 +506,18 @@ static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
>  	if (read_img_buf(imgfd, buf, len) < 0)
>  		goto err;
>  
> -	ret = send(sk, buf, len, 0);
> -	if (ret != len) {
> -		pr_perror("Can't restore %d queue data (%d), want %d",
> -				queue, ret, len);
> -		goto err;
> +	off = 0;
> +	while (len) {
> +		int chunk = len > max_wshare ? max_wshare : len;
> +
> +		ret = send(sk, buf + off, chunk, 0);
> +		if (ret != chunk) {
> +			pr_perror("Can't restore %d queue data (%d), want (%d:%d)",
> +				  queue, ret, chunk, len);
> +			goto err;
> +		}
> +		off += chunk;
> +		len -= chunk;
>  	}
>  
>  	err = 0;
> 




More information about the CRIU mailing list