[CRIU] [PATCH] page-server: Fine grained corking control
Andrew Vagin
avagin at odin.com
Mon Nov 9 05:07:38 PST 2015
On Mon, Nov 09, 2015 at 02:24:01PM +0300, Pavel Emelyanov wrote:
> When live migrating a container with large amount of processes
> inside the time to do page-server-ed dump may be up to 10 times
> slower than for the local dump.
>
> The delay is always introduced in the open_page_server_xfer()
> when criu negotiates the has_parent bit on the 2nd task. This
> likely happens because of the Nagel algo taking place -- after
> the write() of the OPEN2 command happened kernel delays this
> command sending waiting for more data.
>
> Fix this by turning NODELAY option on memory transfer sockets
> on both sides, but CORK the socket before (and unCORK one after)
> the actual memory transfer to let kernel merge pagemaps with
> pages when possible.
Acked-by: Andrew Vagin <avagin at odin.com>
I think we need to handle errors of setsockopt()
>
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
>
> ---
> include/util.h | 3 +++
> page-xfer.c | 22 ++++++++++++++++++++--
> util.c | 15 +++++++++++++++
> 3 files changed, 38 insertions(+), 2 deletions(-)
>
> diff --git a/include/util.h b/include/util.h
> index e815117..15513ae 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -263,5 +263,8 @@ int fd_has_data(int lfd);
>
> int make_yard(char *path);
>
> +void tcp_nodelay(int sk, bool on);
> +void tcp_cork(int sk, bool on);
> +
> const char *ns_to_string(unsigned int ns);
> #endif /* __CR_UTIL_H__ */
> diff --git a/page-xfer.c b/page-xfer.c
> index e6e64d7..8a6b47c 100644
> --- a/page-xfer.c
> +++ b/page-xfer.c
> @@ -13,7 +13,7 @@
> #include "image.h"
> #include "page-xfer.h"
> #include "page-pipe.h"
> -
> +#include "util.h"
> #include "protobuf.h"
> #include "protobuf/pagemap.pb-c.h"
>
> @@ -183,6 +183,8 @@ static int page_server_serve(int sk)
> int ret = -1;
> bool flushed = false;
>
> + tcp_nodelay(sk, true);
> +
> if (pipe(cxfer.p)) {
> pr_perror("Can't make pipe for xfer");
> close(sk);
> @@ -400,7 +402,7 @@ int connect_to_page_server(void)
> if (opts.ps_socket != -1) {
> page_server_sk = opts.ps_socket;
> pr_info("Re-using ps socket %d\n", page_server_sk);
> - return 0;
> + goto out;
> }
>
> pr_info("Connecting to server %s:%u\n",
> @@ -420,6 +422,8 @@ int connect_to_page_server(void)
> return -1;
> }
>
> +out:
> + tcp_nodelay(page_server_sk, true);
> return 0;
> }
>
> @@ -512,6 +516,12 @@ static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov)
>
> static void close_server_xfer(struct page_xfer *xfer)
> {
> + /*
> + * UnCORK the socket -- the next steps would be open_page_server_xfer
> + * exchange which should again happen in NODELAY mode.
> + */
> +
> + tcp_cork(xfer->sk, false);
> xfer->sk = -1;
> }
>
> @@ -546,6 +556,14 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
> if (has_parent)
> xfer->parent = (void *) 1; /* This is required for generate_iovs() */
>
> + /*
> + * CORK the socket now, since we plan to send a bunch
> + * of pagemap + pages pairs and each piece of them worth
> + * being merged with the others.
> + */
> +
> + tcp_cork(xfer->sk, true);
> +
> return 0;
> }
>
> diff --git a/util.c b/util.c
> index 337b0ed..0ea6352 100644
> --- a/util.c
> +++ b/util.c
> @@ -28,6 +28,9 @@
> #include <sys/wait.h>
> #include <sys/resource.h>
> #include <sys/wait.h>
> +#include <sys/socket.h>
> +#include <netinet/in.h>
> +#include <netinet/tcp.h>
>
> #include "compiler.h"
> #include "asm/types.h"
> @@ -880,3 +883,15 @@ const char *ns_to_string(unsigned int ns)
> return NULL;
> }
> }
> +
> +void tcp_cork(int sk, bool on)
> +{
> + int val = on ? 1 : 0;
> + setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val));
> +}
> +
> +void tcp_nodelay(int sk, bool on)
> +{
> + int val = on ? 1 : 0;
> + setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val));
> +}
> --
> 1.9.3
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list