[CRIU] [PATCH 2/2] tcp: restore the boundary between sent and unsent data

Andrew Vagin avagin at parallels.com
Wed Nov 13 13:09:16 PST 2013


ps: Thanks Pavel for the idea of disabling repair mode for restoring
    unsent data.

On Thu, Nov 14, 2013 at 01:01:57AM +0400, Andrey Vagin wrote:
> All data in a write buffer can be divided on two parts sent but not yet
> acknowledged data and unsent data.
> 
> Currently the boundary between sent and unsent data is not dumped and
> all the data are restored as if they have already been sent.
> This methode can provoke long delays in tcp connection, because a kernel
> can wait before retransmitting data.
> https://bugzilla.openvz.org/show_bug.cgi?id=2808
> 
> The TCP stack must know which data have been sent, because
> acknowledgment can be received for them. These data must be restored in
> repair mode.
> 
> The second part of data have never been sent out, so they can be
> restored without any tricks. These data can be sent into socket as
> usual.
> 
> For restoring unsent data the repair mode is disabled for socket,
> but it is enabled back after restoring data. It will be disabled
> after unlocking network. In this case window probe is sent, which is
> required for waknge the connection.
> 
> This patch fixes long delays in tcp connections after dumping and
> restoring.
> 
> https://bugzilla.openvz.org/show_bug.cgi?id=2808
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  sk-tcp.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 38 insertions(+), 12 deletions(-)
> 
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 0db83a3..baf3221 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -452,19 +452,12 @@ static int restore_tcp_seqs(int sk, TcpStreamEntry *tse)
>  	return 0;
>  }
>  
> -static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> +static int __send_tcp_queue(int sk, int queue, u32 len, int imgfd)
>  {
>  	int ret, err = -1;
>  	int off, max;
>  	char *buf;
>  
> -	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
> -
> -	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
> -		pr_perror("Can't set repair queue");
> -		return -1;
> -	}
> -
>  	buf = xmalloc(len);
>  	if (!buf)
>  		return -1;
> @@ -494,16 +487,49 @@ err:
>  	return err;
>  }
>  
> +static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
> +{
> +	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
> +
> +	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
> +		pr_perror("Can't set repair queue");
> +		return -1;
> +	}
> +
> +	return __send_tcp_queue(sk, queue, len, imgfd);
> +}
> +
>  static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd)
>  {
> +	u32 len;
> +
>  	if (restore_prepare_socket(sk))
>  		return -1;
>  
> -	if (tse->inq_len &&
> -			send_tcp_queue(sk, TCP_RECV_QUEUE, tse->inq_len, fd))
> +	len = tse->inq_len;
> +	if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, fd))
> +		return -1;
> +
> +	/*
> +	 * All data in a write buffer can be divided on two parts sent
> +	 * but not yet acknowledged data and unsent data.
> +	 * The TCP stack must know which data have been sent, because
> +	 * acknowledgment can be received for them. These data must be
> +	 * restored in repair mode.
> +	 */
> +	len = tse->outq_len - tse->unsq_len;
> +	if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
> +		return -1;
> +
> +	/*
> +	 * The second part of data have never been sent to outside, so
> +	 * they can be restored without any tricks.
> +	 */
> +	len = tse->unsq_len;
> +	tcp_repair_off(sk);
> +	if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
>  		return -1;
> -	if (tse->outq_len &&
> -			send_tcp_queue(sk, TCP_SEND_QUEUE, tse->outq_len, fd))
> +	if (tcp_repair_on(sk))
>  		return -1;
>  
>  	return 0;
> -- 
> 1.8.3.1
> 


More information about the CRIU mailing list