[CRIU] [PATCH 2/2] tcp: restore the boundary between sent and unsent data
Andrew Vagin
avagin at parallels.com
Wed Nov 13 13:09:16 PST 2013
ps: Thanks Pavel for the idea of disabling repair mode for restoring
unsent data.
On Thu, Nov 14, 2013 at 01:01:57AM +0400, Andrey Vagin wrote:
> All data in a write buffer can be divided on two parts sent but not yet
> acknowledged data and unsent data.
>
> Currently the boundary between sent and unsent data is not dumped and
> all the data are restored as if they have already been sent.
> This methode can provoke long delays in tcp connection, because a kernel
> can wait before retransmitting data.
> https://bugzilla.openvz.org/show_bug.cgi?id=2808
>
> The TCP stack must know which data have been sent, because
> acknowledgment can be received for them. These data must be restored in
> repair mode.
>
> The second part of data have never been sent out, so they can be
> restored without any tricks. These data can be sent into socket as
> usual.
>
> For restoring unsent data the repair mode is disabled for socket,
> but it is enabled back after restoring data. It will be disabled
> after unlocking network. In this case window probe is sent, which is
> required for waknge the connection.
>
> This patch fixes long delays in tcp connections after dumping and
> restoring.
>
> https://bugzilla.openvz.org/show_bug.cgi?id=2808
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> sk-tcp.c | 50 ++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 38 insertions(+), 12 deletions(-)
>
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 0db83a3..baf3221 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -452,19 +452,12 @@ static int restore_tcp_seqs(int sk, TcpStreamEntry *tse)
> return 0;
> }
>
> -static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> +static int __send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> {
> int ret, err = -1;
> int off, max;
> char *buf;
>
> - pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
> -
> - if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
> - pr_perror("Can't set repair queue");
> - return -1;
> - }
> -
> buf = xmalloc(len);
> if (!buf)
> return -1;
> @@ -494,16 +487,49 @@ err:
> return err;
> }
>
> +static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> +{
> + pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
> +
> + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
> + pr_perror("Can't set repair queue");
> + return -1;
> + }
> +
> + return __send_tcp_queue(sk, queue, len, imgfd);
> +}
> +
> static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd)
> {
> + u32 len;
> +
> if (restore_prepare_socket(sk))
> return -1;
>
> - if (tse->inq_len &&
> - send_tcp_queue(sk, TCP_RECV_QUEUE, tse->inq_len, fd))
> + len = tse->inq_len;
> + if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, fd))
> + return -1;
> +
> + /*
> + * All data in a write buffer can be divided on two parts sent
> + * but not yet acknowledged data and unsent data.
> + * The TCP stack must know which data have been sent, because
> + * acknowledgment can be received for them. These data must be
> + * restored in repair mode.
> + */
> + len = tse->outq_len - tse->unsq_len;
> + if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
> + return -1;
> +
> + /*
> + * The second part of data have never been sent to outside, so
> + * they can be restored without any tricks.
> + */
> + len = tse->unsq_len;
> + tcp_repair_off(sk);
> + if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
> return -1;
> - if (tse->outq_len &&
> - send_tcp_queue(sk, TCP_SEND_QUEUE, tse->outq_len, fd))
> + if (tcp_repair_on(sk))
> return -1;
>
> return 0;
> --
> 1.8.3.1
>
More information about the CRIU
mailing list