[CRIU] [PATCH] page-server: Fine grained corking control

Andrew Vagin avagin at odin.com
Mon Nov 9 05:07:38 PST 2015


On Mon, Nov 09, 2015 at 02:24:01PM +0300, Pavel Emelyanov wrote:
> When live migrating a container with large amount of processes
> inside the time to do page-server-ed dump may be up to 10 times
> slower than for the local dump.
> 
> The delay is always introduced in the open_page_server_xfer()
> when criu negotiates the has_parent bit on the 2nd task. This
> likely happens because of the Nagel algo taking place -- after
> the write() of the OPEN2 command happened kernel delays this
> command sending waiting for more data.
> 
> Fix this by turning NODELAY option on memory transfer sockets
> on both sides, but CORK the socket before (and unCORK one after)
> the actual memory transfer to let kernel merge pagemaps with
> pages when possible.

Acked-by: Andrew Vagin <avagin at odin.com>

I think we need to handle errors of setsockopt()
> 
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
> 
> ---
>  include/util.h |  3 +++
>  page-xfer.c    | 22 ++++++++++++++++++++--
>  util.c         | 15 +++++++++++++++
>  3 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/include/util.h b/include/util.h
> index e815117..15513ae 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -263,5 +263,8 @@ int fd_has_data(int lfd);
>  
>  int make_yard(char *path);
>  
> +void tcp_nodelay(int sk, bool on);
> +void tcp_cork(int sk, bool on);
> +
>  const char *ns_to_string(unsigned int ns);
>  #endif /* __CR_UTIL_H__ */
> diff --git a/page-xfer.c b/page-xfer.c
> index e6e64d7..8a6b47c 100644
> --- a/page-xfer.c
> +++ b/page-xfer.c
> @@ -13,7 +13,7 @@
>  #include "image.h"
>  #include "page-xfer.h"
>  #include "page-pipe.h"
> -
> +#include "util.h"
>  #include "protobuf.h"
>  #include "protobuf/pagemap.pb-c.h"
>  
> @@ -183,6 +183,8 @@ static int page_server_serve(int sk)
>  	int ret = -1;
>  	bool flushed = false;
>  
> +	tcp_nodelay(sk, true);
> +
>  	if (pipe(cxfer.p)) {
>  		pr_perror("Can't make pipe for xfer");
>  		close(sk);
> @@ -400,7 +402,7 @@ int connect_to_page_server(void)
>  	if (opts.ps_socket != -1) {
>  		page_server_sk = opts.ps_socket;
>  		pr_info("Re-using ps socket %d\n", page_server_sk);
> -		return 0;
> +		goto out;
>  	}
>  
>  	pr_info("Connecting to server %s:%u\n",
> @@ -420,6 +422,8 @@ int connect_to_page_server(void)
>  		return -1;
>  	}
>  
> +out:
> +	tcp_nodelay(page_server_sk, true);
>  	return 0;
>  }
>  
> @@ -512,6 +516,12 @@ static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov)
>  
>  static void close_server_xfer(struct page_xfer *xfer)
>  {
> +	/*
> +	 * UnCORK the socket -- the next steps would be open_page_server_xfer
> +	 * exchange which should again happen in NODELAY mode.
> +	 */
> +
> +	tcp_cork(xfer->sk, false);
>  	xfer->sk = -1;
>  }
>  
> @@ -546,6 +556,14 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
>  	if (has_parent)
>  		xfer->parent = (void *) 1; /* This is required for generate_iovs() */
>  
> +	/*
> +	 * CORK the socket now, since we plan to send a bunch
> +	 * of pagemap + pages pairs and each piece of them worth
> +	 * being merged with the others.
> +	 */
> +
> +	tcp_cork(xfer->sk, true);
> +
>  	return 0;
>  }
>  
> diff --git a/util.c b/util.c
> index 337b0ed..0ea6352 100644
> --- a/util.c
> +++ b/util.c
> @@ -28,6 +28,9 @@
>  #include <sys/wait.h>
>  #include <sys/resource.h>
>  #include <sys/wait.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <netinet/tcp.h>
>  
>  #include "compiler.h"
>  #include "asm/types.h"
> @@ -880,3 +883,15 @@ const char *ns_to_string(unsigned int ns)
>  		return NULL;
>  	}
>  }
> +
> +void tcp_cork(int sk, bool on)
> +{
> +	int val = on ? 1 : 0;
> +	setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val));
> +}
> +
> +void tcp_nodelay(int sk, bool on)
> +{
> +	int val = on ? 1 : 0;
> +	setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val));
> +}
> -- 
> 1.9.3
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list